Upgrade mathlib to latest

Latest SourceSDK MathLib with minor modifications.
2025-02-09 19:15:03 +01:00 · 2022-07-08 00:55:01 +02:00 · 2022-07-08 00:55:01 +02:00 · 86c9ac5292
commit 86c9ac5292
parent d8a45ae563
37 changed files with 9609 additions and 1368 deletions
--- a/r5dev/bonesetup/bone_utils.cpp
+++ b/r5dev/bonesetup/bone_utils.cpp
@ -0,0 +1,101 @@
+//===== Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ======//
+//
+// Purpose: 
+//
+// $NoKeywords: $
+//
+//===========================================================================//
+
+#include "core/stdafx.h"
+#include "mathlib/mathlib.h"
+
+//-----------------------------------------------------------------------------
+// Purpose: qt = ( s * p ) * q
+//-----------------------------------------------------------------------------
+void QuaternionSM(float s, const Quaternion& p, const Quaternion& q, Quaternion& qt)
+{
+	Quaternion		p1, q1;
+
+	QuaternionScale(p, s, p1);
+	QuaternionMult(p1, q, q1);
+	QuaternionNormalize(q1);
+	qt[0] = q1[0];
+	qt[1] = q1[1];
+	qt[2] = q1[2];
+	qt[3] = q1[3];
+}
+
+#if ALLOW_SIMD_QUATERNION_MATH
+FORCEINLINE fltx4 QuaternionSMSIMD(const fltx4& s, const fltx4& p, const fltx4& q)
+{
+	fltx4 p1, q1, result;
+	p1 = QuaternionScaleSIMD(p, s);
+	q1 = QuaternionMultSIMD(p1, q);
+	result = QuaternionNormalizeSIMD(q1);
+	return result;
+}
+
+FORCEINLINE fltx4 QuaternionSMSIMD(float s, const fltx4& p, const fltx4& q)
+{
+	return QuaternionSMSIMD(ReplicateX4(s), p, q);
+}
+#endif
+
+//-----------------------------------------------------------------------------
+// Purpose: qt = p * ( s * q )
+//-----------------------------------------------------------------------------
+void QuaternionMA(const Quaternion& p, float s, const Quaternion& q, Quaternion& qt)
+{
+	Quaternion p1, q1;
+
+	QuaternionScale(q, s, q1);
+	QuaternionMult(p, q1, p1);
+	QuaternionNormalize(p1);
+	qt[0] = p1[0];
+	qt[1] = p1[1];
+	qt[2] = p1[2];
+	qt[3] = p1[3];
+}
+
+#if ALLOW_SIMD_QUATERNION_MATH
+
+FORCEINLINE fltx4 QuaternionMASIMD(const fltx4& p, const fltx4& s, const fltx4& q)
+{
+	fltx4 p1, q1, result;
+	q1 = QuaternionScaleSIMD(q, s);
+	p1 = QuaternionMultSIMD(p, q1);
+	result = QuaternionNormalizeSIMD(p1);
+	return result;
+}
+
+FORCEINLINE fltx4 QuaternionMASIMD(const fltx4& p, float s, const fltx4& q)
+{
+	return QuaternionMASIMD(p, ReplicateX4(s), q);
+}
+#endif
+
+
+//-----------------------------------------------------------------------------
+// Purpose: qt = p + s * q
+//-----------------------------------------------------------------------------
+void QuaternionAccumulate(const Quaternion& p, float s, const Quaternion& q, Quaternion& qt)
+{
+	Quaternion q2;
+	QuaternionAlign(p, q, q2);
+
+	qt[0] = p[0] + s * q2[0];
+	qt[1] = p[1] + s * q2[1];
+	qt[2] = p[2] + s * q2[2];
+	qt[3] = p[3] + s * q2[3];
+}
+
+#if ALLOW_SIMD_QUATERNION_MATH
+FORCEINLINE fltx4 QuaternionAccumulateSIMD(const fltx4& p, float s, const fltx4& q)
+{
+	fltx4 q2, s4, result;
+	q2 = QuaternionAlignSIMD(p, q);
+	s4 = ReplicateX4(s);
+	result = MaddSIMD(s4, q2, p);
+	return result;
+}
+#endif
--- a/r5dev/core/init.cpp
+++ b/r5dev/core/init.cpp
@ -35,6 +35,7 @@
 #ifndef DEDICATED
 #include "milessdk/win64_rrthreads.h"
 #endif // !DEDICATED
+#include "mathlib/mathlib.h"
 #include "vphysics/QHull.h"
 #include "bsplib/bsplib.h"
 #include "materialsystem/cmaterialsystem.h"
@ -118,9 +119,10 @@ void Systems_Init()
 {
 	spdlog::info("+-------------------------------------------------------------+\n");
 	QuerySystemInfo();
-	CFastTimer initTimer;

+	CFastTimer initTimer;
 	initTimer.Start();
+
 	for (IDetour* pDetour : vDetour)
 	{
 		pDetour->GetCon();
@ -128,13 +130,14 @@ void Systems_Init()
 		pDetour->GetVar();
 	}
 	initTimer.End();
+
 	spdlog::info("+-------------------------------------------------------------+\n");
 	spdlog::info("Detour->Init()   '{:10.6f}' seconds ('{:12d}' clocks)\n", initTimer.GetDuration().GetSeconds(), initTimer.GetDuration().GetCycles());

 	initTimer.Start();

-	// Initialize WinSock system.
-	WS_Init();
+	WS_Init();      // Initialize WinSock.
+	MathLib_Init(); // Initialize MathLib.

 	// Begin the detour transaction to hook the the process
 	DetourTransactionBegin();
@ -404,11 +407,14 @@ void QuerySystemInfo()
 			std::system_category().message(static_cast<int>(::GetLastError())));
 	}

-	if (!(pi.m_bSSE && pi.m_bSSE2))
+	if (!s_bMathlibInitialized)
 	{
-		if (MessageBoxA(NULL, "SSE and SSE2 are required.", "Unsupported CPU", MB_ICONERROR | MB_OK))
+		if (!(pi.m_bSSE && pi.m_bSSE2))
 		{
-			TerminateProcess(GetCurrentProcess(), 0xBAD0C0DE);
+			if (MessageBoxA(NULL, "SSE and SSE2 are required.", "Unsupported CPU", MB_ICONERROR | MB_OK))
+			{
+				TerminateProcess(GetCurrentProcess(), 0xBAD0C0DE);
+			}
 		}
 	}
 }
--- a/r5dev/mathlib/almostequal.cpp
+++ b/r5dev/mathlib/almostequal.cpp
@ -1,4 +1,4 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//========= Copyright <20> 1996-2008, Valve Corporation, All rights reserved. ============//
 //
 // Purpose: Fast ways to compare equality of two floats.  Assumes 
 // sizeof(float) == sizeof(int) and we are using IEEE format.
--- a/r5dev/mathlib/color_conversion.cpp
+++ b/r5dev/mathlib/color_conversion.cpp
@ -1,4 +1,4 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//========= Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ============//
 //
 // Purpose: Color conversion routines.
 //
@ -34,71 +34,71 @@ static float	g_Mathlib_LinearToGamma[256];	// linear (0..1) to gamma (0..1)
 // TODO: move this into the one DLL that actually uses it, instead of statically
 // linking it everywhere via mathlib.
 ALIGN128 float	power2_n[256] = 			// 2**(index - 128) / 255
-{ 
-	1.152445441982634800E-041, 2.304890883965269600E-041, 4.609781767930539200E-041, 9.219563535861078400E-041, 
+{
+	1.152445441982634800E-041, 2.304890883965269600E-041, 4.609781767930539200E-041, 9.219563535861078400E-041,
 	1.843912707172215700E-040, 3.687825414344431300E-040, 7.375650828688862700E-040, 1.475130165737772500E-039,
-	2.950260331475545100E-039, 5.900520662951090200E-039, 1.180104132590218000E-038, 2.360208265180436100E-038, 
-	4.720416530360872100E-038, 9.440833060721744200E-038, 1.888166612144348800E-037, 3.776333224288697700E-037, 
-	7.552666448577395400E-037, 1.510533289715479100E-036, 3.021066579430958200E-036, 6.042133158861916300E-036, 
-	1.208426631772383300E-035, 2.416853263544766500E-035, 4.833706527089533100E-035, 9.667413054179066100E-035, 
-	1.933482610835813200E-034, 3.866965221671626400E-034, 7.733930443343252900E-034, 1.546786088668650600E-033, 
-	3.093572177337301200E-033, 6.187144354674602300E-033, 1.237428870934920500E-032, 2.474857741869840900E-032, 
-	4.949715483739681800E-032, 9.899430967479363700E-032, 1.979886193495872700E-031, 3.959772386991745500E-031, 
-	7.919544773983491000E-031, 1.583908954796698200E-030, 3.167817909593396400E-030, 6.335635819186792800E-030, 
-	1.267127163837358600E-029, 2.534254327674717100E-029, 5.068508655349434200E-029, 1.013701731069886800E-028, 
-	2.027403462139773700E-028, 4.054806924279547400E-028, 8.109613848559094700E-028, 1.621922769711818900E-027, 
-	3.243845539423637900E-027, 6.487691078847275800E-027, 1.297538215769455200E-026, 2.595076431538910300E-026, 
-	5.190152863077820600E-026, 1.038030572615564100E-025, 2.076061145231128300E-025, 4.152122290462256500E-025, 
-	8.304244580924513000E-025, 1.660848916184902600E-024, 3.321697832369805200E-024, 6.643395664739610400E-024, 
-	1.328679132947922100E-023, 2.657358265895844200E-023, 5.314716531791688300E-023, 1.062943306358337700E-022, 
-	2.125886612716675300E-022, 4.251773225433350700E-022, 8.503546450866701300E-022, 1.700709290173340300E-021, 
-	3.401418580346680500E-021, 6.802837160693361100E-021, 1.360567432138672200E-020, 2.721134864277344400E-020, 
-	5.442269728554688800E-020, 1.088453945710937800E-019, 2.176907891421875500E-019, 4.353815782843751100E-019, 
-	8.707631565687502200E-019, 1.741526313137500400E-018, 3.483052626275000900E-018, 6.966105252550001700E-018, 
-	1.393221050510000300E-017, 2.786442101020000700E-017, 5.572884202040001400E-017, 1.114576840408000300E-016, 
-	2.229153680816000600E-016, 4.458307361632001100E-016, 8.916614723264002200E-016, 1.783322944652800400E-015, 
-	3.566645889305600900E-015, 7.133291778611201800E-015, 1.426658355722240400E-014, 2.853316711444480700E-014, 
-	5.706633422888961400E-014, 1.141326684577792300E-013, 2.282653369155584600E-013, 4.565306738311169100E-013, 
-	9.130613476622338300E-013, 1.826122695324467700E-012, 3.652245390648935300E-012, 7.304490781297870600E-012, 
-	1.460898156259574100E-011, 2.921796312519148200E-011, 5.843592625038296500E-011, 1.168718525007659300E-010, 
-	2.337437050015318600E-010, 4.674874100030637200E-010, 9.349748200061274400E-010, 1.869949640012254900E-009, 
-	3.739899280024509800E-009, 7.479798560049019500E-009, 1.495959712009803900E-008, 2.991919424019607800E-008, 
-	5.983838848039215600E-008, 1.196767769607843100E-007, 2.393535539215686200E-007, 4.787071078431372500E-007, 
-	9.574142156862745000E-007, 1.914828431372549000E-006, 3.829656862745098000E-006, 7.659313725490196000E-006, 
-	1.531862745098039200E-005, 3.063725490196078400E-005, 6.127450980392156800E-005, 1.225490196078431400E-004, 
-	2.450980392156862700E-004, 4.901960784313725400E-004, 9.803921568627450800E-004, 1.960784313725490200E-003, 
-	3.921568627450980300E-003, 7.843137254901960700E-003, 1.568627450980392100E-002, 3.137254901960784300E-002, 
-	6.274509803921568500E-002, 1.254901960784313700E-001, 2.509803921568627400E-001, 5.019607843137254800E-001, 
-	1.003921568627451000E+000, 2.007843137254901900E+000, 4.015686274509803900E+000, 8.031372549019607700E+000, 
-	1.606274509803921500E+001, 3.212549019607843100E+001, 6.425098039215686200E+001, 1.285019607843137200E+002, 
-	2.570039215686274500E+002, 5.140078431372548900E+002, 1.028015686274509800E+003, 2.056031372549019600E+003, 
-	4.112062745098039200E+003, 8.224125490196078300E+003, 1.644825098039215700E+004, 3.289650196078431300E+004, 
-	6.579300392156862700E+004, 1.315860078431372500E+005, 2.631720156862745100E+005, 5.263440313725490100E+005, 
-	1.052688062745098000E+006, 2.105376125490196000E+006, 4.210752250980392100E+006, 8.421504501960784200E+006, 
-	1.684300900392156800E+007, 3.368601800784313700E+007, 6.737203601568627400E+007, 1.347440720313725500E+008, 
-	2.694881440627450900E+008, 5.389762881254901900E+008, 1.077952576250980400E+009, 2.155905152501960800E+009, 
-	4.311810305003921500E+009, 8.623620610007843000E+009, 1.724724122001568600E+010, 3.449448244003137200E+010, 
-	6.898896488006274400E+010, 1.379779297601254900E+011, 2.759558595202509800E+011, 5.519117190405019500E+011, 
-	1.103823438081003900E+012, 2.207646876162007800E+012, 4.415293752324015600E+012, 8.830587504648031200E+012, 
-	1.766117500929606200E+013, 3.532235001859212500E+013, 7.064470003718425000E+013, 1.412894000743685000E+014, 
-	2.825788001487370000E+014, 5.651576002974740000E+014, 1.130315200594948000E+015, 2.260630401189896000E+015, 
-	4.521260802379792000E+015, 9.042521604759584000E+015, 1.808504320951916800E+016, 3.617008641903833600E+016, 
-	7.234017283807667200E+016, 1.446803456761533400E+017, 2.893606913523066900E+017, 5.787213827046133800E+017, 
-	1.157442765409226800E+018, 2.314885530818453500E+018, 4.629771061636907000E+018, 9.259542123273814000E+018, 
-	1.851908424654762800E+019, 3.703816849309525600E+019, 7.407633698619051200E+019, 1.481526739723810200E+020, 
-	2.963053479447620500E+020, 5.926106958895241000E+020, 1.185221391779048200E+021, 2.370442783558096400E+021, 
-	4.740885567116192800E+021, 9.481771134232385600E+021, 1.896354226846477100E+022, 3.792708453692954200E+022, 
-	7.585416907385908400E+022, 1.517083381477181700E+023, 3.034166762954363400E+023, 6.068333525908726800E+023, 
-	1.213666705181745400E+024, 2.427333410363490700E+024, 4.854666820726981400E+024, 9.709333641453962800E+024, 
-	1.941866728290792600E+025, 3.883733456581585100E+025, 7.767466913163170200E+025, 1.553493382632634000E+026, 
-	3.106986765265268100E+026, 6.213973530530536200E+026, 1.242794706106107200E+027, 2.485589412212214500E+027, 
-	4.971178824424429000E+027, 9.942357648848857900E+027, 1.988471529769771600E+028, 3.976943059539543200E+028, 
-	7.953886119079086300E+028, 1.590777223815817300E+029, 3.181554447631634500E+029, 6.363108895263269100E+029, 
-	1.272621779052653800E+030, 2.545243558105307600E+030, 5.090487116210615300E+030, 1.018097423242123100E+031, 
-	2.036194846484246100E+031, 4.072389692968492200E+031, 8.144779385936984400E+031, 1.628955877187396900E+032, 
-	3.257911754374793800E+032, 6.515823508749587500E+032, 1.303164701749917500E+033, 2.606329403499835000E+033, 
-	5.212658806999670000E+033, 1.042531761399934000E+034, 2.085063522799868000E+034, 4.170127045599736000E+034, 
-	8.340254091199472000E+034, 1.668050818239894400E+035, 3.336101636479788800E+035, 6.672203272959577600E+035 
+	2.950260331475545100E-039, 5.900520662951090200E-039, 1.180104132590218000E-038, 2.360208265180436100E-038,
+	4.720416530360872100E-038, 9.440833060721744200E-038, 1.888166612144348800E-037, 3.776333224288697700E-037,
+	7.552666448577395400E-037, 1.510533289715479100E-036, 3.021066579430958200E-036, 6.042133158861916300E-036,
+	1.208426631772383300E-035, 2.416853263544766500E-035, 4.833706527089533100E-035, 9.667413054179066100E-035,
+	1.933482610835813200E-034, 3.866965221671626400E-034, 7.733930443343252900E-034, 1.546786088668650600E-033,
+	3.093572177337301200E-033, 6.187144354674602300E-033, 1.237428870934920500E-032, 2.474857741869840900E-032,
+	4.949715483739681800E-032, 9.899430967479363700E-032, 1.979886193495872700E-031, 3.959772386991745500E-031,
+	7.919544773983491000E-031, 1.583908954796698200E-030, 3.167817909593396400E-030, 6.335635819186792800E-030,
+	1.267127163837358600E-029, 2.534254327674717100E-029, 5.068508655349434200E-029, 1.013701731069886800E-028,
+	2.027403462139773700E-028, 4.054806924279547400E-028, 8.109613848559094700E-028, 1.621922769711818900E-027,
+	3.243845539423637900E-027, 6.487691078847275800E-027, 1.297538215769455200E-026, 2.595076431538910300E-026,
+	5.190152863077820600E-026, 1.038030572615564100E-025, 2.076061145231128300E-025, 4.152122290462256500E-025,
+	8.304244580924513000E-025, 1.660848916184902600E-024, 3.321697832369805200E-024, 6.643395664739610400E-024,
+	1.328679132947922100E-023, 2.657358265895844200E-023, 5.314716531791688300E-023, 1.062943306358337700E-022,
+	2.125886612716675300E-022, 4.251773225433350700E-022, 8.503546450866701300E-022, 1.700709290173340300E-021,
+	3.401418580346680500E-021, 6.802837160693361100E-021, 1.360567432138672200E-020, 2.721134864277344400E-020,
+	5.442269728554688800E-020, 1.088453945710937800E-019, 2.176907891421875500E-019, 4.353815782843751100E-019,
+	8.707631565687502200E-019, 1.741526313137500400E-018, 3.483052626275000900E-018, 6.966105252550001700E-018,
+	1.393221050510000300E-017, 2.786442101020000700E-017, 5.572884202040001400E-017, 1.114576840408000300E-016,
+	2.229153680816000600E-016, 4.458307361632001100E-016, 8.916614723264002200E-016, 1.783322944652800400E-015,
+	3.566645889305600900E-015, 7.133291778611201800E-015, 1.426658355722240400E-014, 2.853316711444480700E-014,
+	5.706633422888961400E-014, 1.141326684577792300E-013, 2.282653369155584600E-013, 4.565306738311169100E-013,
+	9.130613476622338300E-013, 1.826122695324467700E-012, 3.652245390648935300E-012, 7.304490781297870600E-012,
+	1.460898156259574100E-011, 2.921796312519148200E-011, 5.843592625038296500E-011, 1.168718525007659300E-010,
+	2.337437050015318600E-010, 4.674874100030637200E-010, 9.349748200061274400E-010, 1.869949640012254900E-009,
+	3.739899280024509800E-009, 7.479798560049019500E-009, 1.495959712009803900E-008, 2.991919424019607800E-008,
+	5.983838848039215600E-008, 1.196767769607843100E-007, 2.393535539215686200E-007, 4.787071078431372500E-007,
+	9.574142156862745000E-007, 1.914828431372549000E-006, 3.829656862745098000E-006, 7.659313725490196000E-006,
+	1.531862745098039200E-005, 3.063725490196078400E-005, 6.127450980392156800E-005, 1.225490196078431400E-004,
+	2.450980392156862700E-004, 4.901960784313725400E-004, 9.803921568627450800E-004, 1.960784313725490200E-003,
+	3.921568627450980300E-003, 7.843137254901960700E-003, 1.568627450980392100E-002, 3.137254901960784300E-002,
+	6.274509803921568500E-002, 1.254901960784313700E-001, 2.509803921568627400E-001, 5.019607843137254800E-001,
+	1.003921568627451000E+000, 2.007843137254901900E+000, 4.015686274509803900E+000, 8.031372549019607700E+000,
+	1.606274509803921500E+001, 3.212549019607843100E+001, 6.425098039215686200E+001, 1.285019607843137200E+002,
+	2.570039215686274500E+002, 5.140078431372548900E+002, 1.028015686274509800E+003, 2.056031372549019600E+003,
+	4.112062745098039200E+003, 8.224125490196078300E+003, 1.644825098039215700E+004, 3.289650196078431300E+004,
+	6.579300392156862700E+004, 1.315860078431372500E+005, 2.631720156862745100E+005, 5.263440313725490100E+005,
+	1.052688062745098000E+006, 2.105376125490196000E+006, 4.210752250980392100E+006, 8.421504501960784200E+006,
+	1.684300900392156800E+007, 3.368601800784313700E+007, 6.737203601568627400E+007, 1.347440720313725500E+008,
+	2.694881440627450900E+008, 5.389762881254901900E+008, 1.077952576250980400E+009, 2.155905152501960800E+009,
+	4.311810305003921500E+009, 8.623620610007843000E+009, 1.724724122001568600E+010, 3.449448244003137200E+010,
+	6.898896488006274400E+010, 1.379779297601254900E+011, 2.759558595202509800E+011, 5.519117190405019500E+011,
+	1.103823438081003900E+012, 2.207646876162007800E+012, 4.415293752324015600E+012, 8.830587504648031200E+012,
+	1.766117500929606200E+013, 3.532235001859212500E+013, 7.064470003718425000E+013, 1.412894000743685000E+014,
+	2.825788001487370000E+014, 5.651576002974740000E+014, 1.130315200594948000E+015, 2.260630401189896000E+015,
+	4.521260802379792000E+015, 9.042521604759584000E+015, 1.808504320951916800E+016, 3.617008641903833600E+016,
+	7.234017283807667200E+016, 1.446803456761533400E+017, 2.893606913523066900E+017, 5.787213827046133800E+017,
+	1.157442765409226800E+018, 2.314885530818453500E+018, 4.629771061636907000E+018, 9.259542123273814000E+018,
+	1.851908424654762800E+019, 3.703816849309525600E+019, 7.407633698619051200E+019, 1.481526739723810200E+020,
+	2.963053479447620500E+020, 5.926106958895241000E+020, 1.185221391779048200E+021, 2.370442783558096400E+021,
+	4.740885567116192800E+021, 9.481771134232385600E+021, 1.896354226846477100E+022, 3.792708453692954200E+022,
+	7.585416907385908400E+022, 1.517083381477181700E+023, 3.034166762954363400E+023, 6.068333525908726800E+023,
+	1.213666705181745400E+024, 2.427333410363490700E+024, 4.854666820726981400E+024, 9.709333641453962800E+024,
+	1.941866728290792600E+025, 3.883733456581585100E+025, 7.767466913163170200E+025, 1.553493382632634000E+026,
+	3.106986765265268100E+026, 6.213973530530536200E+026, 1.242794706106107200E+027, 2.485589412212214500E+027,
+	4.971178824424429000E+027, 9.942357648848857900E+027, 1.988471529769771600E+028, 3.976943059539543200E+028,
+	7.953886119079086300E+028, 1.590777223815817300E+029, 3.181554447631634500E+029, 6.363108895263269100E+029,
+	1.272621779052653800E+030, 2.545243558105307600E+030, 5.090487116210615300E+030, 1.018097423242123100E+031,
+	2.036194846484246100E+031, 4.072389692968492200E+031, 8.144779385936984400E+031, 1.628955877187396900E+032,
+	3.257911754374793800E+032, 6.515823508749587500E+032, 1.303164701749917500E+033, 2.606329403499835000E+033,
+	5.212658806999670000E+033, 1.042531761399934000E+034, 2.085063522799868000E+034, 4.170127045599736000E+034,
+	8.340254091199472000E+034, 1.668050818239894400E+035, 3.336101636479788800E+035, 6.672203272959577600E+035
 };

 // You can use this to double check the exponent table and assert that 
@ -108,20 +108,20 @@ ALIGN128 float	power2_n[256] = 			// 2**(index - 128) / 255
 #pragma warning( disable : 4189 ) // disable unused local variable warning
 static void CheckExponentTable()
 {
-	for( int i = 0; i < 256; i++ )
+	for (int i = 0; i < 256; i++)
 	{
-		float testAgainst = pow( 2.0f, i - 128 ) / 255.0f;
-		float diff = testAgainst - power2_n[i] ;
+		float testAgainst = pow(2.0f, i - 128) / 255.0f;
+		float diff = testAgainst - power2_n[i];
 		float relativeDiff = diff / testAgainst;
-		Assert( testAgainst == 0 ? 
-				power2_n[i] < 1.16E-041 :
-				power2_n[i] == testAgainst );
+		Assert(testAgainst == 0 ?
+			power2_n[i] < 1.16E-041 :
+			power2_n[i] == testAgainst);
 	}
 }
 #pragma warning(pop)
 #endif

-void BuildGammaTable( float gamma, float texGamma, float brightness, int overbright )
+void BuildGammaTable(float gamma, float texGamma, float brightness, int overbright)
 {
 	int		i, inf;
 	float	g1, g3;
@ -129,30 +129,30 @@ void BuildGammaTable( float gamma, float texGamma, float brightness, int overbri
 	// Con_Printf("BuildGammaTable %.1f %.1f %.1f\n", g, v_lightgamma.GetFloat(), v_texgamma.GetFloat() );

 	float g = gamma;
-	if (g > 3.0) 
+	if (g > 3.0)
 	{
 		g = 3.0;
 	}

 	g = 1.0 / g;
-	g1 = texGamma * g; 
+	g1 = texGamma * g;

-	if (brightness <= 0.0) 
+	if (brightness <= 0.0)
 	{
 		g3 = 0.125;
 	}
-	else if (brightness > 1.0) 
+	else if (brightness > 1.0)
 	{
 		g3 = 0.05;
 	}
-	else 
+	else
 	{
 		g3 = 0.125 - (brightness * brightness) * 0.075;
 	}

-	for (i=0 ; i<256 ; i++)
+	for (i = 0; i < 256; i++)
 	{
-		inf = 255 * pow ( i/255.f, g1 ); 
+		inf = (int)(255 * pow(i / 255.f, g1));
 		if (inf < 0)
 			inf = 0;
 		if (inf > 255)
@ -160,7 +160,7 @@ void BuildGammaTable( float gamma, float texGamma, float brightness, int overbri
 		texgammatable[i] = inf;
 	}

-	for (i=0 ; i<1024 ; i++)
+	for (i = 0; i < 1024; i++)
 	{
 		float f;

@ -173,11 +173,11 @@ void BuildGammaTable( float gamma, float texGamma, float brightness, int overbri
 		// shift up
 		if (f <= g3)
 			f = (f / g3) * 0.125;
-		else 
+		else
 			f = 0.125 + ((f - g3) / (1.0 - g3)) * 0.875;

 		// convert linear space to desired gamma space
-		inf = 255 * pow ( f, g ); 
+		inf = (int)(255 * pow(f, g));

 		if (inf < 0)
 			inf = 0;
@ -196,32 +196,32 @@ void BuildGammaTable( float gamma, float texGamma, float brightness, int overbri
 	}
 	*/

-	for (i=0 ; i<256 ; i++)
+	for (i = 0; i < 256; i++)
 	{
 		// convert from nonlinear texture space (0..255) to linear space (0..1)
-		texturetolinear[i] =  pow( i / 255.f, texGamma );
+		texturetolinear[i] = pow(i / 255.f, texGamma);

 		// convert from linear space (0..1) to nonlinear (sRGB) space (0..1)
-		g_Mathlib_LinearToGamma[i] =  LinearToGammaFullRange( i / 255.f );
+		g_Mathlib_LinearToGamma[i] = LinearToGammaFullRange(i / 255.f);

 		// convert from sRGB gamma space (0..1) to linear space (0..1)
-		g_Mathlib_GammaToLinear[i] =  GammaToLinearFullRange( i / 255.f );
+		g_Mathlib_GammaToLinear[i] = GammaToLinearFullRange(i / 255.f);
 	}

-	for (i=0 ; i<1024 ; i++)
+	for (i = 0; i < 1024; i++)
 	{
 		// convert from linear space (0..1) to nonlinear texture space (0..255)
-		lineartotexture[i] =  pow( i / 1023.0, 1.0 / texGamma ) * 255;
+		lineartotexture[i] = (int)pow(i / 1023.0, 1.0 / texGamma) * 255;
 	}

 #if 0
-	for (i=0 ; i<256 ; i++)
+	for (i = 0; i < 256; i++)
 	{
 		float f;

 		// convert from nonlinear lightmap space (0..255) to linear space (0..4)
 		// f =  (i / 255.0) * sqrt( 4 );
-		f =  i * (2.0 / 255.0);
+		f = i * (2.0 / 255.0);
 		f = f * f;

 		texlighttolinear[i] = f;
@ -234,50 +234,50 @@ void BuildGammaTable( float gamma, float texGamma, float brightness, int overbri

 		// Can't do overbright without texcombine
 		// UNDONE: Add GAMMA ramp to rectify this
-		if ( overbright == 2 )
+		if (overbright == 2)
 		{
 			overbrightFactor = 0.5;
 		}
-		else if ( overbright == 4 )
+		else if (overbright == 4)
 		{
 			overbrightFactor = 0.25;
 		}

-		for (i=0 ; i<4096 ; i++)
+		for (i = 0; i < 4096; i++)
 		{
 			// convert from linear 0..4 (x1024) to screen corrected vertex space (0..1?)
-			f = pow ( i/1024.0, 1.0 / gamma );
+			f = pow(i / 1024.0, 1.0 / gamma);

 			lineartovertex[i] = f * overbrightFactor;
 			if (lineartovertex[i] > 1)
 				lineartovertex[i] = 1;

-			int nLightmap = RoundFloatToInt( f * 255 * overbrightFactor );
-			nLightmap = clamp( nLightmap, 0, 255 );
+			int nLightmap = RoundFloatToInt(f * 255 * overbrightFactor);
+			nLightmap = clamp(nLightmap, 0, 255);
 			lineartolightmap[i] = (unsigned char)nLightmap;
 		}
 	}
 }

-float GammaToLinearFullRange( float gamma )
+float GammaToLinearFullRange(float gamma)
 {
-	return pow( gamma, 2.2f );
+	return pow(gamma, 2.2f);
 }

-float LinearToGammaFullRange( float linear )
+float LinearToGammaFullRange(float linear)
 {
-	return pow( linear, 1.0f / 2.2f );
+	return pow(linear, 1.0f / 2.2f);
 }

-float GammaToLinear( float gamma )
+float GammaToLinear(float gamma)
 {
-	Assert( s_bMathlibInitialized );
-	if ( gamma < 0.0f )
+	Assert(s_bMathlibInitialized);
+	if (gamma < 0.0f)
 	{
 		return 0.0f;
 	}

-	if ( gamma >= 0.95f )
+	if (gamma >= 0.95f)
 	{
 		// Use GammaToLinearFullRange maybe if you trip this.
 // X360TEMP
@ -285,129 +285,129 @@ float GammaToLinear( float gamma )
 		return 1.0f;
 	}

-	int index = RoundFloatToInt( gamma * 255.0f );
-	Assert( index >= 0 && index < 256 );
+	int index = RoundFloatToInt(gamma * 255.0f);
+	Assert(index >= 0 && index < 256);
 	return g_Mathlib_GammaToLinear[index];
 }

-float LinearToGamma( float linear )
+float LinearToGamma(float linear)
 {
-	Assert( s_bMathlibInitialized );
-	if ( linear < 0.0f )
+	Assert(s_bMathlibInitialized);
+	if (linear < 0.0f)
 	{
 		return 0.0f;
 	}
-	if ( linear > 1.0f )
+	if (linear > 1.0f)
 	{
 		// Use LinearToGammaFullRange maybe if you trip this.
-		Assert( 0 );
+		Assert(0);
 		return 1.0f;
 	}

-	int index = RoundFloatToInt( linear * 255.0f );
-	Assert( index >= 0 && index < 256 );
+	int index = RoundFloatToInt(linear * 255.0f);
+	Assert(index >= 0 && index < 256);
 	return g_Mathlib_LinearToGamma[index];
 }

 //-----------------------------------------------------------------------------
 // Helper functions to convert between sRGB and 360 gamma space
 //-----------------------------------------------------------------------------
-float SrgbGammaToLinear( float flSrgbGammaValue )
+float SrgbGammaToLinear(float flSrgbGammaValue)
 {
-	float x = clamp( flSrgbGammaValue, 0.0f, 1.0f );
-	return ( x <= 0.04045f ) ? ( x / 12.92f ) : ( pow( ( x + 0.055f ) / 1.055f, 2.4f ) );
+	float x = clamp(flSrgbGammaValue, 0.0f, 1.0f);
+	return (x <= 0.04045f) ? (x / 12.92f) : (pow((x + 0.055f) / 1.055f, 2.4f));
 }

-float SrgbLinearToGamma( float flLinearValue )
+float SrgbLinearToGamma(float flLinearValue)
 {
-	float x = clamp( flLinearValue, 0.0f, 1.0f );
-	return ( x <= 0.0031308f ) ? ( x * 12.92f ) : ( 1.055f * pow( x, ( 1.0f / 2.4f ) ) ) - 0.055f;
+	float x = clamp(flLinearValue, 0.0f, 1.0f);
+	return (x <= 0.0031308f) ? (x * 12.92f) : (1.055f * pow(x, (1.0f / 2.4f))) - 0.055f;
 }

-float X360GammaToLinear( float fl360GammaValue )
+float X360GammaToLinear(float fl360GammaValue)
 {
 	float flLinearValue;

-	fl360GammaValue = clamp( fl360GammaValue, 0.0f, 1.0f );
-	if ( fl360GammaValue < ( 96.0f / 255.0f ) )
+	fl360GammaValue = clamp(fl360GammaValue, 0.0f, 1.0f);
+	if (fl360GammaValue < (96.0f / 255.0f))
 	{
-		if ( fl360GammaValue < ( 64.0f / 255.0f ) )
+		if (fl360GammaValue < (64.0f / 255.0f))
 		{
 			flLinearValue = fl360GammaValue * 255.0f;
 		}
 		else
 		{
-			flLinearValue = fl360GammaValue * ( 255.0f * 2.0f ) - 64.0f;
-			flLinearValue += floor( flLinearValue * ( 1.0f / 512.0f ) );
+			flLinearValue = fl360GammaValue * (255.0f * 2.0f) - 64.0f;
+			flLinearValue += floor(flLinearValue * (1.0f / 512.0f));
 		}
 	}
 	else
 	{
-		if( fl360GammaValue < ( 192.0f / 255.0f ) )
+		if (fl360GammaValue < (192.0f / 255.0f))
 		{
-			flLinearValue = fl360GammaValue * ( 255.0f * 4.0f ) - 256.0f;
-			flLinearValue += floor( flLinearValue * ( 1.0f / 256.0f ) );
+			flLinearValue = fl360GammaValue * (255.0f * 4.0f) - 256.0f;
+			flLinearValue += floor(flLinearValue * (1.0f / 256.0f));
 		}
 		else
 		{
-			flLinearValue = fl360GammaValue * ( 255.0f * 8.0f ) - 1024.0f;
-			flLinearValue += floor( flLinearValue * ( 1.0f / 128.0f ) );
+			flLinearValue = fl360GammaValue * (255.0f * 8.0f) - 1024.0f;
+			flLinearValue += floor(flLinearValue * (1.0f / 128.0f));
 		}
 	}

 	flLinearValue *= 1.0f / 1023.0f;

-	flLinearValue = clamp( flLinearValue, 0.0f, 1.0f );
+	flLinearValue = clamp(flLinearValue, 0.0f, 1.0f);
 	return flLinearValue;
 }

-float X360LinearToGamma( float flLinearValue )
+float X360LinearToGamma(float flLinearValue)
 {
 	float fl360GammaValue;

-	flLinearValue = clamp( flLinearValue, 0.0f, 1.0f );
-	if ( flLinearValue < ( 128.0f / 1023.0f ) )
+	flLinearValue = clamp(flLinearValue, 0.0f, 1.0f);
+	if (flLinearValue < (128.0f / 1023.0f))
 	{
-		if ( flLinearValue < ( 64.0f / 1023.0f ) )
+		if (flLinearValue < (64.0f / 1023.0f))
 		{
-			fl360GammaValue = flLinearValue * ( 1023.0f * ( 1.0f / 255.0f ) );
+			fl360GammaValue = flLinearValue * (1023.0f * (1.0f / 255.0f));
 		}
 		else
 		{
-			fl360GammaValue = flLinearValue * ( ( 1023.0f / 2.0f ) * ( 1.0f / 255.0f ) ) + ( 32.0f / 255.0f );
+			fl360GammaValue = flLinearValue * ((1023.0f / 2.0f) * (1.0f / 255.0f)) + (32.0f / 255.0f);
 		}
 	}
 	else
 	{
-		if ( flLinearValue < ( 512.0f / 1023.0f ) )
+		if (flLinearValue < (512.0f / 1023.0f))
 		{
-			fl360GammaValue = flLinearValue * ( ( 1023.0f / 4.0f ) * ( 1.0f / 255.0f ) ) + ( 64.0f / 255.0f );
+			fl360GammaValue = flLinearValue * ((1023.0f / 4.0f) * (1.0f / 255.0f)) + (64.0f / 255.0f);
 		}
 		else
 		{
-			fl360GammaValue = flLinearValue * ( ( 1023.0f /8.0f ) * ( 1.0f / 255.0f ) ) + ( 128.0f /255.0f ); // 1.0 -> 1.0034313725490196078431372549016
-			if ( fl360GammaValue > 1.0f )
+			fl360GammaValue = flLinearValue * ((1023.0f / 8.0f) * (1.0f / 255.0f)) + (128.0f / 255.0f); // 1.0 -> 1.0034313725490196078431372549016
+			if (fl360GammaValue > 1.0f)
 			{
 				fl360GammaValue = 1.0f;
 			}
 		}
 	}

-	fl360GammaValue = clamp( fl360GammaValue, 0.0f, 1.0f );
+	fl360GammaValue = clamp(fl360GammaValue, 0.0f, 1.0f);
 	return fl360GammaValue;
 }

-float SrgbGammaTo360Gamma( float flSrgbGammaValue )
+float SrgbGammaTo360Gamma(float flSrgbGammaValue)
 {
-	float flLinearValue = SrgbGammaToLinear( flSrgbGammaValue );
-	float fl360GammaValue = X360LinearToGamma( flLinearValue );
+	float flLinearValue = SrgbGammaToLinear(flSrgbGammaValue);
+	float fl360GammaValue = X360LinearToGamma(flLinearValue);
 	return fl360GammaValue;
 }

 // convert texture to linear 0..1 value
-float TextureToLinear( int c )
+float TextureToLinear(int c)
 {
-	Assert( s_bMathlibInitialized );
+	Assert(s_bMathlibInitialized);
 	if (c < 0)
 		return 0;
 	if (c > 255)
@ -417,11 +417,11 @@ float TextureToLinear( int c )
 }

 // convert texture to linear 0..1 value
-int LinearToTexture( float f )
+int LinearToTexture(float f)
 {
-	Assert( s_bMathlibInitialized );
+	Assert(s_bMathlibInitialized);
 	int i;
-	i = f * 1023;	// assume 0..1 range
+	i = (int)(f * 1023);	// assume 0..1 range
 	if (i < 0)
 		i = 0;
 	if (i > 1023)
@ -432,11 +432,11 @@ int LinearToTexture( float f )


 // converts 0..1 linear value to screen gamma (0..255)
-int LinearToScreenGamma( float f )
+int LinearToScreenGamma(float f)
 {
-	Assert( s_bMathlibInitialized );
+	Assert(s_bMathlibInitialized);
 	int i;
-	i = f * 1023;	// assume 0..1 range
+	i = (int)(f * 1023);	// assume 0..1 range
 	if (i < 0)
 		i = 0;
 	if (i > 1023)
@ -445,30 +445,30 @@ int LinearToScreenGamma( float f )
 	return lineartoscreen[i];
 }

-void ColorRGBExp32ToVector( const ColorRGBExp32& in, Vector3D& out )
+void ColorRGBExp32ToVector(const ColorRGBExp32& in, Vector3D& out)
 {
-	Assert( s_bMathlibInitialized );
+	Assert(s_bMathlibInitialized);
 	// FIXME: Why is there a factor of 255 built into this?
-	out.x = 255.0f * TexLightToLinear( in.r, in.exponent );
-	out.y = 255.0f * TexLightToLinear( in.g, in.exponent );
-	out.z = 255.0f * TexLightToLinear( in.b, in.exponent );
+	out.x = 255.0f * TexLightToLinear(in.r, in.exponent);
+	out.y = 255.0f * TexLightToLinear(in.g, in.exponent);
+	out.z = 255.0f * TexLightToLinear(in.b, in.exponent);
 }

 #if 0
 // assumes that the desired mantissa range is 128..255
-static int VectorToColorRGBExp32_CalcExponent( float in )
+static int VectorToColorRGBExp32_CalcExponent(float in)
 {
 	int power = 0;
-	
-	if( in != 0.0f )
+
+	if (in != 0.0f)
 	{
-		while( in > 255.0f )
+		while (in > 255.0f)
 		{
 			power += 1;
 			in *= 0.5f;
 		}
-		
-		while( in < 128.0f )
+
+		while (in < 128.0f)
 		{
 			power -= 1;
 			in *= 2.0f;
@ -478,51 +478,51 @@ static int VectorToColorRGBExp32_CalcExponent( float in )
 	return power;
 }

-void VectorToColorRGBExp32( const Vector& vin, ColorRGBExp32 &c )
+void VectorToColorRGBExp32(const Vector3D& vin, ColorRGBExp32& c)
 {
-	Vector v = vin;
-	Assert( s_bMathlibInitialized );
-	Assert( v.x >= 0.0f && v.y >= 0.0f && v.z >= 0.0f );
-	int i;		
-	float max = v[0];				
-	for( i = 1; i < 3; i++ )
+	Vector3D v = vin;
+	Assert(s_bMathlibInitialized);
+	Assert(v.x >= 0.0f && v.y >= 0.0f && v.z >= 0.0f);
+	int i;
+	float max = v[0];
+	for (i = 1; i < 3; i++)
 	{
 		// Get the maximum value.
-		if( v[i] > max )
+		if (v[i] > max)
 		{
 			max = v[i];
 		}
 	}
-				
+
 	// figure out the exponent for this luxel.
-	int exponent = VectorToColorRGBExp32_CalcExponent( max );
-				
+	int exponent = VectorToColorRGBExp32_CalcExponent(max);
+
 	// make the exponent fits into a signed byte.
-	if( exponent < -128 )
+	if (exponent < -128)
 	{
 		exponent = -128;
 	}
-	else if( exponent > 127 )
+	else if (exponent > 127)
 	{
 		exponent = 127;
 	}
-				
+
 	// undone: optimize with a table
-	float scalar = pow( 2.0f, -exponent );
+	float scalar = pow(2.0f, -exponent);
 	// convert to mantissa x 2^exponent format
-	for( i = 0; i < 3; i++ )
+	for (i = 0; i < 3; i++)
 	{
 		v[i] *= scalar;
 		// clamp
-		if( v[i] > 255.0f )
+		if (v[i] > 255.0f)
 		{
 			v[i] = 255.0f;
 		}
 	}
-	c.r = ( unsigned char )v[0];
-	c.g = ( unsigned char )v[1];
-	c.b = ( unsigned char )v[2];
-	c.exponent = ( signed char )exponent;
+	c.r = (unsigned char)v[0];
+	c.g = (unsigned char)v[1];
+	c.b = (unsigned char)v[2];
+	c.exponent = (signed char)exponent;
 }

 #else
@ -531,7 +531,7 @@ void VectorToColorRGBExp32( const Vector& vin, ColorRGBExp32 &c )
 // for f' = f * 2^e,  f is on [128..255].
 // Uses IEEE 754 representation to directly extract this information
 // from the float.
-inline static int VectorToColorRGBExp32_CalcExponent( const float *pin )
+inline static int VectorToColorRGBExp32_CalcExponent(const float* pin)
 {
 	// The thing we will take advantage of here is that the exponent component
 	// is stored in the float itself, and because we want to map to 128..255, we
@ -542,12 +542,12 @@ inline static int VectorToColorRGBExp32_CalcExponent( const float *pin )
 	if (*pin == 0.0f)
 		return 0;

-	unsigned int fbits = *reinterpret_cast<const unsigned int *>(pin);
-	
+	unsigned int fbits = *reinterpret_cast<const unsigned int*>(pin);
+
 	// the exponent component is bits 23..30, and biased by +127
 	const unsigned int biasedSeven = 7 + 127;

-	signed int expComponent = ( fbits & 0x7F800000 ) >> 23;
+	signed int expComponent = (fbits & 0x7F800000) >> 23;
 	expComponent -= biasedSeven; // now the difference from seven (positive if was less than, etc)
 	return expComponent;
 }
@ -561,15 +561,15 @@ inline static int VectorToColorRGBExp32_CalcExponent( const float *pin )
 /// moving it onto the cell.
 /// \warning: Assumes an IEEE 754 single-precision float representation! Those of you
 /// porting to an 8080 are out of luck.
-void VectorToColorRGBExp32( const Vector3D& vin, ColorRGBExp32 &c )
+void VectorToColorRGBExp32(const Vector3D& vin, ColorRGBExp32& c)
 {
-	Assert( s_bMathlibInitialized );
-	Assert( vin.x >= 0.0f && vin.y >= 0.0f && vin.z >= 0.0f );
+	Assert(s_bMathlibInitialized);
+	Assert(vin.x >= 0.0f && vin.y >= 0.0f && vin.z >= 0.0f);

 	// work out which of the channels is the largest ( we will use that to map the exponent )
 	// this is a sluggish branch-based decision tree -- most architectures will offer a [max]
 	// assembly opcode to do this faster.
-	const float *pMax;
+	const float* pMax;
 	if (vin.x > vin.y)
 	{
 		if (vin.x > vin.z)
@ -594,7 +594,7 @@ void VectorToColorRGBExp32( const Vector3D& vin, ColorRGBExp32 &c )
 	}

 	// now work out the exponent for this luxel. 
-	signed int exponent = VectorToColorRGBExp32_CalcExponent( pMax );
+	signed int exponent = VectorToColorRGBExp32_CalcExponent(pMax);

 	// make sure the exponent fits into a signed byte.
 	// (in single precision format this is assured because it was a signed byte to begin with)
@ -604,20 +604,20 @@ void VectorToColorRGBExp32( const Vector3D& vin, ColorRGBExp32 &c )
 	float scalar;
 	{
 		unsigned int fbits = (127 - exponent) << 23;
-		scalar = *reinterpret_cast<float *>(&fbits);
+		scalar = *reinterpret_cast<float*>(&fbits);
 	}

-	// We can totally wind up above 255 and that's okay--but above 256 would be right out.
-	Assert(vin.x * scalar < 256.0f && 
-		   vin.y * scalar < 256.0f && 
-		   vin.z * scalar < 256.0f);
+	// we should never need to clamp:
+	Assert(vin.x * scalar <= 255.0f &&
+		vin.y * scalar <= 255.0f &&
+		vin.z * scalar <= 255.0f);

 	// This awful construction is necessary to prevent VC2005 from using the 
 	// fldcw/fnstcw control words around every float-to-unsigned-char operation.
 	{
-		int red = (vin.x * scalar);
-		int green = (vin.y * scalar);
-		int blue = (vin.z * scalar);
+		int red = (int)(vin.x * scalar);
+		int green = (int)(vin.y * scalar);
+		int blue = (int)(vin.z * scalar);

 		c.r = red;
 		c.g = green;
@ -629,7 +629,7 @@ void VectorToColorRGBExp32( const Vector3D& vin, ColorRGBExp32 &c )
 	c.b = ( unsigned char )(vin.z * scalar);
 	*/

-	c.exponent = ( signed char )exponent;
+	c.exponent = (signed char)exponent;
 }

-#endif
+#endif
--- a/r5dev/mathlib/fltx4.h
+++ b/r5dev/mathlib/fltx4.h
@ -0,0 +1,107 @@
+//===== Copyright 1996-2010, Valve Corporation, All rights reserved. ======//
+//
+// Purpose: - defines the type fltx4 - Avoid cyclic includion.
+//
+//===========================================================================//
+
+#ifndef FLTX4_H
+#define FLTX4_H
+
+#if defined(GNUC)
+#define USE_STDC_FOR_SIMD 0
+#else
+#define USE_STDC_FOR_SIMD 0
+#endif
+
+#if (!defined(PLATFORM_PPC) && (USE_STDC_FOR_SIMD == 0))
+#define _SSE1 1
+#endif
+
+// I thought about defining a class/union for the SIMD packed floats instead of using fltx4,
+// but decided against it because (a) the nature of SIMD code which includes comparisons is to blur
+// the relationship between packed floats and packed integer types and (b) not sure that the
+// compiler would handle generating good code for the intrinsics.
+
+#if USE_STDC_FOR_SIMD
+#error "hello"
+typedef union
+{
+	float  m128_f32[4];
+	uint32 m128_u32[4];
+} fltx4;
+
+typedef fltx4 i32x4;
+typedef fltx4 u32x4;
+
+#ifdef _PS3
+typedef fltx4 u32x4;
+typedef fltx4 i32x4;
+#endif
+typedef fltx4 bi32x4;
+
+#elif ( defined( _PS3 ) )
+
+typedef union
+{
+	// This union allows float/int access (which generally shouldn't be done in inner loops)
+
+	vec_float4	vmxf;
+	vec_int4	vmxi;
+	vec_uint4	vmxui;
+#if defined(__SPU__)
+	vec_uint4	vmxbi;
+#else
+	__vector bool vmxbi;
+#endif
+
+	struct
+	{
+		float x;
+		float y;
+		float z;
+		float w;
+	};
+
+	float		m128_f32[4];
+	uint32		m128_u32[4];
+	int32		m128_i32[4];
+
+} fltx4_union;
+
+typedef vec_float4 fltx4;
+typedef vec_uint4  u32x4;
+typedef vec_int4   i32x4;
+
+#if defined(__SPU__)
+typedef vec_uint4 bi32x4;
+#else
+typedef __vector bool bi32x4;
+#endif
+
+#define DIFFERENT_NATIVE_VECTOR_TYPES // true if the compiler has different types for float4, uint4, int4, etc
+
+#elif ( defined( _X360 ) )
+
+typedef union
+{
+	// This union allows float/int access (which generally shouldn't be done in inner loops)
+	__vector4	vmx;
+	float		m128_f32[4];
+	uint32		m128_u32[4];
+} fltx4_union;
+
+typedef __vector4 fltx4;
+typedef __vector4 i32x4; // a VMX register; just a way of making it explicit that we're doing integer ops.
+typedef __vector4 u32x4; // a VMX register; just a way of making it explicit that we're doing unsigned integer ops.
+typedef fltx4 bi32x4;
+#else
+
+typedef __m128 fltx4;
+typedef __m128 i32x4;
+typedef __m128 u32x4;
+typedef __m128i shortx8;
+typedef fltx4 bi32x4;
+
+#endif
+
+#endif
--- a/r5dev/mathlib/math_pfns.h
+++ b/r5dev/mathlib/math_pfns.h
@ -9,11 +9,36 @@

 #include <limits>

+// YUP_ACTIVE is from Source2. It's (obviously) not supported on this branch, just including it here to help merge camera.cpp/.h and the CSM shadow code.
+//#define YUP_ACTIVE 1
+
+enum MatrixAxisType_t
+{
+#ifdef YUP_ACTIVE
+	FORWARD_AXIS = 2,
+	LEFT_AXIS = 0,
+	UP_AXIS = 1,
+#else
+	FORWARD_AXIS = 0,
+	LEFT_AXIS = 1,
+	UP_AXIS = 2,
+#endif
+
+	X_AXIS = 0,
+	Y_AXIS = 1,
+	Z_AXIS = 2,
+	ORIGIN = 3,
+	PROJECTIVE = 3,
+};
+
 #if defined( _X360 )
 #include <xboxmath.h>
 #elif defined(_PS3)

-#ifndef SPU
+#ifdef SPU
+#include <vectormath/c/vectormath_aos.h>
+#include <spu_intrinsics.h>
+#else
 #include <ppu_asm_intrinsics.h>
 #endif

@ -53,17 +78,19 @@

 #include <xmmintrin.h>

+
+
 // These globals are initialized by mathlib and redirected based on available fpu features

 // The following are not declared as macros because they are often used in limiting situations,
 // and sometimes the compiler simply refuses to inline them for some reason
-FORCEINLINE float FastSqrt(float x)
+FORCEINLINE float VECTORCALL FastSqrt(float x)
 {
 	__m128 root = _mm_sqrt_ss(_mm_load_ss(&x));
 	return *(reinterpret_cast<float*>(&root));
 }

-FORCEINLINE float FastRSqrtFast(float x)
+FORCEINLINE float VECTORCALL FastRSqrtFast(float x)
 {
 	// use intrinsics
 	__m128 rroot = _mm_rsqrt_ss(_mm_load_ss(&x));
@ -72,7 +99,7 @@ FORCEINLINE float FastRSqrtFast(float x)
 // Single iteration NewtonRaphson reciprocal square root:
 // 0.5 * rsqrtps * (3 - x * rsqrtps(x) * rsqrtps(x)) 	
 // Very low error, and fine to use in place of 1.f / sqrtf(x).	
-FORCEINLINE float FastRSqrt(float x)
+FORCEINLINE float VECTORCALL FastRSqrt(float x)
 {
 	float rroot = FastRSqrtFast(x);
 	return (0.5f * rroot) * (3.f - (x * rroot) * rroot);
@ -136,6 +163,7 @@ inline double FastSqrtEst(double x) { return __frsqrte(x) * x; }

 #endif // !defined( PLATFORM_PPC ) && !defined(_SPU)

+
 // if x is infinite, return FLT_MAX
 inline float FastClampInfinity(float x)
 {
@ -146,7 +174,19 @@ inline float FastClampInfinity(float x)
 #endif
 }

-#if defined (_PS3) && !defined(SPU)
+#if defined (_PS3) 
+
+#if defined(__SPU__)
+
+inline int _rotl(int a, int count)
+{
+	vector signed int vi;
+	vi = spu_promote(a, 0);
+	vi = spu_rl(vi, count);
+	return spu_extract(vi, 0);
+}
+
+#else

 // extern float cosvf(float);      /* single precision cosine      */
 // extern float sinvf(float);      /* single precision sine        */
@ -164,63 +204,6 @@ inline int64 _rotl64(int64 x, int c)
 	return __rldicl(x, c, 0);
 }

-//-----------------------------------------------------------------
-// Vector Unions
-//-----------------------------------------------------------------
-
-//-----------------------------------------------------------------
-// Floats
-//-----------------------------------------------------------------
-typedef union
-{
-	vector float vf;
-	float f[4];
-} vector_float_union;
-
-//-----------------------------------------------------------------
-// Ints
-//-----------------------------------------------------------------
-typedef union
-{
-	vector int vi;
-	int i[4];
-} vector_int4_union;
-
-typedef union
-{
-	vector unsigned int vui;
-	unsigned int ui[4];
-} vector_uint4_union;
-
-//-----------------------------------------------------------------
-// Shorts
-//-----------------------------------------------------------------
-typedef union
-{
-	vector signed short vs;
-	signed short s[8];
-} vector_short8_union;
-
-typedef union
-{
-	vector unsigned short vus;
-	unsigned short us[8];
-} vector_ushort8_union;
-
-//-----------------------------------------------------------------
-// Chars
-//-----------------------------------------------------------------
-typedef union
-{
-	vector signed char vc;
-	signed char c[16];
-} vector_char16_union;
-
-typedef union
-{
-	vector unsigned char vuc;
-	unsigned char uc[16];
-} vector_uchar16_union;

 /*
 FORCEINLINE float _VMX_Sqrt( float x )
@ -277,6 +260,95 @@ FORCEINLINE float _VMX_Cos(float a)
 #define FastSinCos(x,s,c)	_VMX_SinCos(x,s,c)
 #define FastCos(x)			_VMX_Cos(x)
 */
+
+#endif
+
+
+#if defined(__SPU__)
+
+// do we need these optimized yet?
+
+FORCEINLINE float FastSqrt(float x)
+{
+	return sqrtf(x);
+}
+
+FORCEINLINE float FastRSqrt(float x)
+{
+	float rroot = 1.f / (sqrtf(x) + FLT_EPSILON);
+	return rroot;
+}
+
+
+#define FastRSqrtFast(x)	FastRSqrt(x)
+
+
+#endif
+
+
+
+//-----------------------------------------------------------------
+// Vector Unions
+//-----------------------------------------------------------------
+
+//-----------------------------------------------------------------
+// Floats
+//-----------------------------------------------------------------
+typedef union
+{
+	vector float vf;
+	float f[4];
+} vector_float_union;
+
+#if !defined(__SPU__)
+//-----------------------------------------------------------------
+// Ints
+//-----------------------------------------------------------------
+typedef union
+{
+	vector int vi;
+	int i[4];
+} vector_int4_union;
+
+typedef union
+{
+	vector unsigned int vui;
+	unsigned int ui[4];
+} vector_uint4_union;
+
+//-----------------------------------------------------------------
+// Shorts
+//-----------------------------------------------------------------
+typedef union
+{
+	vector signed short vs;
+	signed short s[8];
+} vector_short8_union;
+
+typedef union
+{
+	vector unsigned short vus;
+	unsigned short us[8];
+} vector_ushort8_union;
+
+//-----------------------------------------------------------------
+// Chars
+//-----------------------------------------------------------------
+typedef union
+{
+	vector signed char vc;
+	signed char c[16];
+} vector_char16_union;
+
+typedef union
+{
+	vector unsigned char vuc;
+	unsigned char uc[16];
+} vector_uchar16_union;
+#endif
+
+
+
 #endif	// _PS3
 #endif	// #ifndef SPU

--- a/r5dev/mathlib/mathlib.h
+++ b/r5dev/mathlib/mathlib.h
--- a/r5dev/mathlib/mathlib_base.cpp
+++ b/r5dev/mathlib/mathlib_base.cpp
--- a/r5dev/mathlib/noisedata.h
+++ b/r5dev/mathlib/noisedata.h
@ -1,4 +1,4 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//====== Copyright <20> 1996-2006, Valve Corporation, All rights reserved. =======//
 //
 // Purpose: static data for noise() primitives.
 //
@ -178,3 +178,132 @@ static float impulse_zcoords[] = {
    0.796078,0.615686,0.878431,0.921569,0.631373,0.200000,0.403922,0.462745
 };

+static float s_randomGradients[] = {
+    -0.460087, -0.887463, -0.058594 ,-0.458151, 0.861646, -0.430176 ,
+    -0.930437, 0.316048, -0.195496 ,-0.883558, -0.393287, -0.276550 ,
+    0.171025, -0.983455, -0.329712 ,-0.033573, -0.941867, -0.994995 ,
+    -0.476492, 0.014764, 0.879150 ,0.834786, -0.454571, 0.348755 ,-0.585801,
+     -0.782531, -0.338745 ,0.973990, -0.023774, 0.225403 ,-0.989659,
+     -0.011313, -0.143005 ,0.507109, -0.838016, -0.369141 ,-0.609995,
+     -0.766277, 0.314087 ,0.429987, 0.599850, -0.843323 ,0.089587,
+     -0.904071, -0.977783 ,-0.306997, -0.901432, 0.705078 ,0.031606,
+     0.994782, -0.950806 ,0.797663, -0.161508, -0.588806 ,0.811569,
+     -0.505360, 0.339783 ,0.936130, -0.114223, 0.334778 ,0.217280,
+     -0.970264, 0.440674 ,0.600976, -0.712375, -0.516418 ,0.197935,
+     0.979260, 0.213501 ,0.002956, 0.999995, -0.268127 ,-0.912763, 0.084651,
+     -0.401062 ,-0.193271, -0.945607, -0.804382 ,0.662480, 0.640156,
+     -0.506348 ,0.363459, -0.884439, 0.627197 ,-0.433415, 0.685363,
+     0.803589 ,-0.721652, 0.416952, -0.607971 ,0.647676, 0.296700,
+     0.734863 ,0.723040, -0.444294, 0.590454 ,-0.716318, -0.420435,
+     -0.613770 ,-0.039076, -0.996459, 0.885437 ,0.175225, -0.969092,
+     0.703918 ,0.116952, -0.991832, -0.399048 ,-0.504674, -0.013997,
+     0.863281 ,-0.436364, -0.817916, 0.651733 ,0.098030, -0.995090,
+     0.137573 ,0.637157, -0.766031, -0.132263 ,-0.594718, 0.583153,
+     -0.681213 ,-0.625632, 0.419913, -0.724426 ,-0.607341, -0.394521,
+     0.750427 ,-0.312161, 0.698925, 0.899719 ,0.101228, -0.927363,
+     -0.962708 ,-0.934241, 0.041214, -0.354553 ,-0.826005, -0.284775,
+     -0.507446 ,-0.363751, -0.929287, -0.173584 ,-0.141266, 0.983869,
+     -0.613525 ,-0.436139, -0.074329, 0.899292 ,-0.875355, -0.480839,
+     0.057556 ,0.250714, 0.071270, 0.967896 ,0.182131, 0.811467, 0.950195 ,
+    -0.687696, -0.668570, -0.380554 ,0.785175, -0.540171, -0.359863 ,
+    0.399774, 0.848526, 0.655151 ,-0.412243, -0.004602, 0.911072 ,-0.132187,
+     -0.990485, 0.278198 ,0.212421, 0.764179, 0.944214 ,-0.694878, 0.234042,
+     -0.699402 ,0.404273, 0.904644, -0.316406 ,0.358393, 0.087135,
+     0.933044 ,-0.473398, 0.820774, -0.559692 ,0.044667, -0.997938,
+     0.718201 ,0.603896, -0.046386, 0.796570 ,-0.968822, 0.180966,
+     0.172058 ,-0.458206, 0.886932, -0.126221 ,-0.656709, -0.410319,
+     0.693848 ,0.999495, -0.018023, 0.026184 ,-0.486069, -0.740178,
+     -0.690979 ,0.942399, -0.333819, 0.022461 ,-0.294545, 0.867619,
+     0.805664 ,0.886791, -0.416081, -0.221252 ,-0.797187, 0.587661,
+     -0.171021 ,-0.617708, -0.762817, -0.295654 ,0.449351, -0.853660,
+     -0.505615 ,0.065153, -0.995535, 0.723572 ,0.996518, 0.000000,
+     0.083374 ,0.263346, 0.088663, -0.964417 ,-0.221316, -0.970864,
+     0.383423 ,-0.512560, 0.718804, 0.675598 ,0.588859, 0.406293,
+     -0.764648 ,-0.803841, -0.592769, -0.061646 ,0.860199, 0.492898,
+     -0.150330 ,-0.351871, 0.858024, 0.728455 ,0.515724, -0.815149,
+     0.455322 ,-0.122322, -0.960484, 0.898254 ,-0.529020, 0.844443,
+     -0.156799 ,0.530671, -0.725304, 0.637024 ,-0.748915, -0.248928,
+     -0.634094 ,-0.188099, 0.584087, 0.972778 ,0.974165, 0.222094,
+     -0.041992 ,0.595326, -0.701663, -0.549438 ,-0.060279, -0.998047,
+     -0.262451 ,-0.191682, -0.782292, -0.951477 ,0.528851, -0.596315,
+     0.752319 ,0.612134, 0.639567, -0.604919 ,0.882803, 0.200541, 0.433594 ,
+    -0.936278, -0.039490, 0.349304 ,0.940848, -0.121649, 0.318604 ,
+    -0.115022, 0.048685, -0.993347 ,-0.324162, -0.935726, -0.394226 ,
+    -0.937457, -0.294685, 0.193909 ,0.894463, -0.437237, 0.104065 ,
+    -0.861852, -0.165102, -0.486206 ,-0.980480, -0.139899, 0.139526 ,
+    -0.024496, 0.960750, -0.996094 ,-0.699760, 0.714256, -0.018860 ,
+    0.538575, -0.792107, 0.470581 ,0.309926, -0.943720, 0.349182 ,0.525671,
+     -0.772280, 0.561523 ,-0.793079, 0.268745, 0.567505 ,0.697504,
+     -0.421131, 0.639221 ,-0.737871, 0.672553, -0.076660 ,-0.390769,
+     -0.894942, -0.482666 ,-0.593469, 0.191892, 0.796448 ,0.439379,
+     -0.896646, 0.123108 ,0.337698, -0.703709, -0.879822 ,-0.654687,
+     0.749517, 0.148071 ,-0.482070, -0.700569, 0.737305 ,0.626971, 0.761948,
+     -0.250610 ,0.616585, 0.015339, -0.787231 ,-0.175877, -0.982000,
+     0.364624 ,0.891483, -0.324585, -0.334167 ,0.858029, 0.438272,
+     -0.297913 ,0.949369, 0.258757, 0.184448 ,0.105948, -0.901183,
+     0.969666 ,-0.261581, 0.943276, -0.615845 ,-0.682063, -0.528339,
+     -0.595520 ,-0.810856, 0.514103, -0.326050 ,-0.163757, 0.986118,
+     0.165527 ,-0.595927, -0.221907, 0.791504 ,-0.160374, -0.977354,
+     0.652405 ,-0.428837, 0.641628, -0.829102 ,-0.634149, -0.486378,
+     -0.687927 ,-0.093271, -0.995222, -0.295654 ,0.988659, -0.150144,
+     -0.003357 ,0.730821, -0.497396, -0.538818 ,-0.781913, -0.621260,
+     -0.065674 ,-0.655884, -0.753313, -0.073486 ,0.845542, -0.409094,
+     0.375977 ,-0.630041, -0.514925, -0.678101 ,0.205571, 0.978634,
+     -0.019531 ,0.582841, 0.763684, -0.430054 ,0.685084, -0.728464,
+     0.000000 ,-0.241437, -0.958430, -0.532898 ,0.741884, 0.020899,
+     -0.670349 ,0.740273, -0.318412, 0.624634 ,-0.738068, -0.539041,
+     0.481812 ,-0.965798, -0.034508, -0.257141 ,0.495184, 0.805372,
+     0.549683 ,-0.572524, 0.809558, -0.221008 ,-0.537181, 0.834652,
+     0.220825 ,-0.899741, 0.097826, -0.427368 ,-0.370148, 0.494066,
+     0.904846 ,0.711387, 0.577688, 0.490356 ,0.183324, -0.722791,
+     -0.964172 ,0.552815, -0.807753, -0.347351 ,-0.096050, 0.994565,
+     -0.386047 ,-0.884907, 0.369536, 0.305115 ,-0.832976, -0.551898,
+     0.047363 ,0.338883, 0.641922, 0.897034 ,0.805354, 0.506187, 0.357727 ,
+    -0.040128, 0.998805, -0.570923 ,0.466918, -0.602455, 0.811035 ,0.139166,
+     -0.983697, 0.633362 ,-0.253765, -0.340498, -0.962891 ,-0.448806,
+     0.843929, 0.547791 ,-0.859087, -0.434649, -0.300110 ,0.287570,
+     0.957661, 0.047729 ,0.379100, 0.795023, 0.780640 ,0.154245, -0.987903,
+     -0.103088 ,-0.538067, 0.794791, -0.462524 ,-0.466455, -0.180966,
+     0.880371 ,-0.175736, -0.983766, 0.202576 ,-0.891655, 0.192080,
+     -0.417725 ,-0.688716, -0.619004, 0.480652 ,0.120790, -0.987844,
+     -0.629456 ,-0.075080, 0.983385, 0.910461 ,0.147032, -0.960431,
+     -0.849304 ,0.732309, 0.671559, 0.152283 ,0.804657, 0.273913,
+     -0.547729 ,0.391462, -0.913976, 0.263184 ,-0.567300, 0.783128,
+     0.409607 ,0.214917, 0.167182, -0.975952 ,0.367428, -0.789995,
+     -0.800537 ,-0.320112, 0.912727, -0.621399 ,0.659247, -0.647346,
+     -0.501892 ,0.222842, -0.696452, -0.950562 ,-0.697513, -0.576278,
+     0.521118 ,0.602260, -0.756081, 0.391418 ,-0.116043, 0.992942,
+     0.206665 ,0.220693, -0.968855, -0.453552 ,0.737991, 0.670137,
+     0.106812 ,0.198419, -0.696590, 0.960999 ,-0.391866, -0.883543,
+     0.547668 ,0.082067, -0.996213, 0.330200 ,-0.806059, 0.491897,
+     -0.377991 ,-0.992265, 0.120698, 0.029236 ,0.406622, -0.867524,
+     0.575928 ,0.789945, 0.608406, 0.096191 ,-0.531904, -0.004218,
+     -0.846802 ,0.558298, -0.089427, 0.828125 ,-0.783155, 0.363828,
+     -0.541382 ,0.981706, -0.183228, 0.052673 ,-0.388642, 0.920618,
+     -0.096497 ,-0.506403, -0.044662, -0.862000 ,-0.512421, -0.852059,
+     -0.204163 ,0.559542, 0.339777, 0.803772 ,0.527502, -0.846389,
+     0.137573 ,-0.184315, -0.952725, 0.794983 ,0.125024, -0.977110,
+     -0.809082 ,-0.643507, 0.678632, 0.482056 ,-0.277474, 0.954056,
+     0.377380 ,-0.622333, -0.717603, 0.448914 ,0.366846, -0.110794,
+     -0.929382 ,0.120402, 0.992596, 0.131653 ,-0.982921, 0.103550,
+     -0.152954 ,-0.058333, -0.997913, -0.428894 ,0.132631, 0.979299,
+     0.755432 ,0.326398, 0.937806, 0.340637 ,0.211720, 0.976659, 0.168640 ,
+    0.957557, -0.019174, -0.287659 ,-0.016554, 0.999650, 0.780090 ,
+    -0.271222, 0.827292, -0.875732 ,0.850790, -0.448069, 0.307129 ,0.115949,
+     0.600003, -0.989441 ,0.285877, -0.940896, -0.536255 ,-0.321317,
+     -0.278336, -0.942383 ,-0.422133, 0.754447, 0.765747 ,0.669674,
+     -0.741852, -0.051514 ,0.213604, -0.949888, 0.730103 ,0.619681,
+     -0.751798, -0.341797 ,-0.223762, 0.438616, -0.968506 ,-0.302925,
+     -0.945732, 0.361877 ,0.121093, -0.977151, -0.821838 ,0.127125,
+     0.758710, -0.980774 ,0.691682, 0.695626, 0.270203 ,0.241114, 0.967463,
+     -0.303040 ,-0.829705, 0.422869, 0.402100 ,-0.484170, -0.741723,
+     0.692017 ,-0.431259, -0.777492, -0.727844 ,0.835756, -0.211986,
+     0.518311 ,0.297724, 0.932993, 0.561829 ,0.633475, -0.764920,
+     -0.181091 ,-0.833849, -0.453546, -0.353027 ,-0.369433, 0.839581,
+     -0.733154 ,0.555847, 0.392934, -0.796631 ,-0.856065, 0.028375,
+     0.516296 ,0.067161, 0.997565, 0.269409 ,-0.962279, -0.051749,
+     0.267456 ,-0.738893, 0.080065, -0.671204 ,-0.764325, 0.462240,
+     0.507019 ,0.148758, 0.751545, 0.974243 ,-0.153430, -0.318230,
+     0.986816 ,-0.439372, 0.776405, 0.716919
+};
+
--- a/r5dev/mathlib/powsse.cpp
+++ b/r5dev/mathlib/powsse.cpp
@ -1,4 +1,4 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//========= Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ============//
 //
 // Purpose: 
 //
@ -45,6 +45,7 @@ fltx4 Pow_FixedPoint_Exponent_SIMD(const fltx4& x, int exponent)



+#ifndef _PS3 // these aren't fast (or correct) on the PS3
 /*
 * (c) Ian Stephenson
 *
@ -94,4 +95,7 @@ float FastPow10(float i)
 {
 	return FastPow2(i * 3.321928f);
 }
+#else
+#pragma message("TODO: revisit fast logs on all PPC hardware")
+#endif

--- a/r5dev/mathlib/randsse.cpp
+++ b/r5dev/mathlib/randsse.cpp
@ -1,4 +1,4 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//========= Copyright <20> 1996-2006, Valve Corporation, All rights reserved. ============//
 //
 // Purpose: generates 4 randum numbers in the range 0..1 quickly, using SIMD
 //
@ -6,7 +6,7 @@

 #include "core/stdafx.h"
 #include "tier0/dbg.h"
-#include "tier0/basetypes.h"
+#include "tier0/threadtools.h"
 #include "mathlib/mathlib.h"
 #include "mathlib/vector.h"
 #include "mathlib/ssemath.h"
@ -43,7 +43,7 @@ public:
 		fltx4 retval = AddSIMD(*m_pRand_K, *m_pRand_J);

 		// if ( ret>=1.0) ret-=1.0
-		fltx4 overflow_mask = CmpGeSIMD(retval, Four_Ones);
+		bi32x4 overflow_mask = CmpGeSIMD(retval, Four_Ones);
 		retval = SubSIMD(retval, AndSIMD(Four_Ones, overflow_mask));

 		*m_pRand_K = retval;
@ -86,6 +86,7 @@ int GetSIMDRandContext(void)
 				// try to take it!
 				if (ThreadInterlockedAssignIf(&(s_nRandContextsInUse[i]), 1, 0))
 				{
+					ThreadMemoryBarrier();
 					return i;								// done!
 				}
 			}
@ -97,6 +98,7 @@ int GetSIMDRandContext(void)

 void ReleaseSIMDRandContext(int nContext)
 {
+	ThreadMemoryBarrier();
 	s_nRandContextsInUse[nContext] = 0;
 }

--- a/r5dev/mathlib/sseconst.cpp
+++ b/r5dev/mathlib/sseconst.cpp
@ -1,13 +1,27 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//===== Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ======//
 //
 // Purpose: 
 //
 //===========================================================================//

+#if defined(__SPU__)
+#include "platform.h"
+#include "basetypes.h"
+#include "mathlib/mathlib.h"
+#include "mathlib/math_pfns.h"
+// #include "mathlib/fltx4.h"
+#include "ps3/spu_job_shared.h"
+#endif
+
 #include "core/stdafx.h"
 #include "mathlib/ssemath.h"
 #include "mathlib/ssequaternion.h"
+//#include "mathlib/compressed_vector.h"

+// NOTE: This has to be the last file included!
+//#include "tier0/memdbgon.h"
+
+#if !defined(__SPU__)
 const fltx4 Four_PointFives = { 0.5,0.5,0.5,0.5 };
 #ifndef _X360
 const fltx4 Four_Zeros = { 0.0,0.0,0.0,0.0 };
@ -23,14 +37,27 @@ const fltx4 Four_2ToThe21s = { (float)(1 << 21), (float)(1 << 21), (float)(1 <<
 const fltx4 Four_2ToThe22s = { (float)(1 << 22), (float)(1 << 22), (float)(1 << 22), (float)(1 << 22) };
 const fltx4 Four_2ToThe23s = { (float)(1 << 23), (float)(1 << 23), (float)(1 << 23), (float)(1 << 23) };
 const fltx4 Four_2ToThe24s = { (float)(1 << 24), (float)(1 << 24), (float)(1 << 24), (float)(1 << 24) };
-
+const fltx4 Four_Thirds = { 0.33333333, 0.33333333, 0.33333333, 0.33333333 };
+const fltx4 Four_TwoThirds = { 0.66666666, 0.66666666, 0.66666666, 0.66666666 };
 const fltx4 Four_Point225s = { .225, .225, .225, .225 };
 const fltx4 Four_Epsilons = { FLT_EPSILON,FLT_EPSILON,FLT_EPSILON,FLT_EPSILON };
+const fltx4 Four_DegToRad = { ((float)(M_PI_F / 180.f)), ((float)(M_PI_F / 180.f)), ((float)(M_PI_F / 180.f)), ((float)(M_PI_F / 180.f)) };

 const fltx4 Four_FLT_MAX = { FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX };
 const fltx4 Four_Negative_FLT_MAX = { -FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX };
 const fltx4 g_SIMD_0123 = { 0., 1., 2., 3. };

+const fltx4 Four_LinearToGammaCoefficients_A = { -3.7295, -3.7295, -3.7295, -3.7295 };
+const fltx4 Four_LinearToGammaCoefficients_B = { 8.9635,  8.9635,  8.9635,  8.9635 };
+const fltx4 Four_LinearToGammaCoefficients_C = { -7.7397,  -7.7397,  -7.7397,  -7.7397 };
+const fltx4 Four_LinearToGammaCoefficients_D = { 3.443, 3.443, 3.443, 3.443 };
+const fltx4 Four_LinearToGammaCoefficients_E = { 0.048, 0.048, 0.048, 0.048 };
+
+const fltx4 Four_GammaToLinearCoefficients_A = { .1731, .1731, .1731, .1731 };
+const fltx4 Four_GammaToLinearCoefficients_B = { .8717, .8717, .8717, .8717 };
+const fltx4 Four_GammaToLinearCoefficients_C = { -.0452, -.0452, -.0452, -.0452 };
+const fltx4 Four_GammaToLinearCoefficients_D = { .0012, .0012, .0012, .0012 };
+
 const fltx4 g_QuatMultRowSign[4] =
 {
 	{  1.0f,  1.0f, -1.0f, 1.0f },
@ -38,20 +65,28 @@ const fltx4 g_QuatMultRowSign[4] =
 	{  1.0f, -1.0f,  1.0f, 1.0f },
 	{ -1.0f, -1.0f, -1.0f, 1.0f }
 };
+#endif

-const uint32 ALIGN16 g_SIMD_clear_signmask[4] ALIGN16_POST = { 0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff };
-const uint32 ALIGN16 g_SIMD_signmask[4] ALIGN16_POST = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
-const uint32 ALIGN16 g_SIMD_lsbmask[4] ALIGN16_POST = { 0xfffffffe, 0xfffffffe, 0xfffffffe, 0xfffffffe };
-const uint32 ALIGN16 g_SIMD_clear_wmask[4] ALIGN16_POST = { 0xffffffff, 0xffffffff, 0xffffffff, 0 };
-const uint32 ALIGN16 g_SIMD_AllOnesMask[4] ALIGN16_POST = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; // ~0,~0,~0,~0
-const uint32 ALIGN16 g_SIMD_Low16BitsMask[4] ALIGN16_POST = { 0xffff, 0xffff, 0xffff, 0xffff }; // 0xffff x 4

-const uint32 ALIGN16 g_SIMD_ComponentMask[4][4] ALIGN16_POST =
+const int32 ALIGN16 g_SIMD_clear_signmask[4] ALIGN16_POST = { 0x7fffffff,0x7fffffff,0x7fffffff,0x7fffffff };
+const int32 ALIGN16 g_SIMD_signmask[4] ALIGN16_POST = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+const int32 ALIGN16 g_SIMD_lsbmask[4] ALIGN16_POST = { 0xfffffffe, 0xfffffffe, 0xfffffffe, 0xfffffffe };
+const int32 ALIGN16 g_SIMD_clear_wmask[4] ALIGN16_POST = { 0xffffffff, 0xffffffff, 0xffffffff, 0 };
+const int32 ALIGN16 g_SIMD_AllOnesMask[4] ALIGN16_POST = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }; // ~0,~0,~0,~0
+const int32 ALIGN16 g_SIMD_Low16BitsMask[4] ALIGN16_POST = { 0xffff, 0xffff, 0xffff, 0xffff }; // 0xffff x 4
+
+
+const int32 ALIGN16 g_SIMD_ComponentMask[4][4] ALIGN16_POST =
 {
 	{ 0xFFFFFFFF, 0, 0, 0 }, { 0, 0xFFFFFFFF, 0, 0 }, { 0, 0, 0xFFFFFFFF, 0 }, { 0, 0, 0, 0xFFFFFFFF }
 };

-const uint32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST =
+const fltx4 g_SIMD_Identity[4] =
+{
+	{ 1.0, 0, 0, 0 }, { 0, 1.0, 0, 0 }, { 0, 0, 1.0, 0 }, { 0, 0, 0, 1.0 }
+};
+
+const int32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST =
 {
 	{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
 	{ 0xffffffff, 0x00000000, 0x00000000, 0x00000000 },
@ -59,6 +94,114 @@ const uint32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST =
 	{ 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000 },
 };

+const int32 ALIGN16 g_SIMD_EveryOtherMask[4] = { 0, ~0, 0, ~0 };
+
+
+
+#ifdef PLATFORM_PPC
+
+/// Passed as a parameter to vslh, shuffles the z component of a quat48 stored in the zw words left by one bit.
+const uint16 ALIGN16 g_SIMD_Quat48_Unpack_Shift[] = {
+	0x00, 0x00,												// x word
+	0x00, 0x00,												// y word
+	0x00, 0x01,												// z word 
+	0x00, 0x00 };											// w word 
+
+// this permutes uint16's x,y,z packed in the most significant four halfwords of a fltx4 
+// so that each gets its own word in the output. expected use is // __vperm( XX, Four_Threes, permute )
+// -- that way each int is represented as 3.0 + n * 2^-22 , which we can pull into the 
+// appropriate range with a single madd!
+const uint8 ALIGN16 g_SIMD_Quat48_Unpack_Permute0[16] =
+{
+	16, 17, 0, 1,											// word one:   00XX
+	16, 17, 2, 3,											// word two:   00YY
+	16, 17, 4, 5,											// word three: 00ZZ
+	16, 17, 6, 7											// word four:  00WW
+};
+
+// the other permutes are a little trickier. note: I'm defining them out of order.
+// 2 and 5 blend together prior results, rather than a source with 3.0f
+
+// out1 = __vperm( x0y0z0x1y1z1x2y2, Four_Threes, *reinterpret_cast<const fltx4 *>(g_SIMD_Quat48_Unpack_Permute1) ); // __x1__y1__z1____
+const uint8 ALIGN16 g_SIMD_Quat48_Unpack_Permute1[16] =
+{
+	16, 17, 6, 7,											// word one:   00XX
+	16, 17, 8, 9,											// word two:   00YY
+	16, 17, 10, 11,											// word three: 00ZZ
+	16, 17, 12, 13											// word four:  00WW
+};
+
+// out3 = __vperm( z2x3y3z3x4y4z4x5, Four_Threes, *reinterpret_cast<const fltx4 *>(g_SIMD_Quat48_Unpack_Permute3) ); // __x3__y3__z3__z2  // z2 is important, goes into out2
+const uint8 ALIGN16 g_SIMD_Quat48_Unpack_Permute3[16] =
+{
+	16, 17, 2, 3,
+	16, 17, 4, 5,
+	16, 17, 6, 7,
+	16, 17, 0, 1
+};
+
+// out4 = __vperm( z2x3y3z3x4y4z4x5, Four_Threes, *reinterpret_cast<const fltx4 *>(g_SIMD_Quat48_Unpack_Permute4) ); // __x4__y4__z4__x5  // x5 is important, goes into out5
+const uint8 ALIGN16 g_SIMD_Quat48_Unpack_Permute4[16] =
+{
+	16, 17, 8, 9,
+	16, 17, 10, 11,
+	16, 17, 12, 13,
+	16, 17, 14, 15
+};
+
+// out6 = __vperm( y5z5x6y6z6x7y7z7, Four_Threes, *reinterpret_cast<const fltx4 *>(g_SIMD_Quat48_Unpack_Permute6) ); // __x6__y6__z6____
+const uint8 ALIGN16 g_SIMD_Quat48_Unpack_Permute6[16] =
+{
+	16, 17, 4, 5,    // word one
+	16, 17, 6, 7,  // word two
+	16, 17, 8, 9,  // word three
+	16, 17, 10, 11   // word four  (garbage)
+};
+
+// out7 = __vperm( y5z5x6y6z6x7y7z7, Four_Threes, *reinterpret_cast<const fltx4 *>(g_SIMD_Quat48_Unpack_Permute7) ); // __x7__y7__z7____
+const uint8 ALIGN16 g_SIMD_Quat48_Unpack_Permute7[16] =
+{
+	16, 17, 10, 11,    // word one
+	16, 17, 12, 13,  // word two
+	16, 17, 14, 15,  // word three
+	16, 17, 16, 17   // word four  (garbage)
+};
+
+// these last two are tricky because we mix old output with source input. we get the 3.0f
+// from the old output.
+// out2 = __vperm( x0y0z0x1y1z1x2y2, out3, *reinterpret_cast<const fltx4 *>(g_SIMD_Quat48_Unpack_Permute2)  ); // __x2__y2__z2____
+const uint8 ALIGN16 g_SIMD_Quat48_Unpack_Permute2[16] =
+{
+	16, 17, 12, 13,  // 3.x2   
+	16, 17, 14, 15,  // 3.y2
+	16, 17, 30, 31,  // 3.z2 (from out2)
+	16, 17, 16, 17
+};
+
+// out5 = __vperm( y5z5x6y6z6x7y7z7, out4, *reinterpret_cast<const fltx4 *>(g_SIMD_Quat48_Unpack_Permute5)  ) // __x5__y5__z5____
+const uint8 ALIGN16 g_SIMD_Quat48_Unpack_Permute5[16] =
+{
+	16, 17, 30, 31,  // 3.x5  (from out5)  
+	16, 17,  0,  1,  // 3.y5
+	16, 17,  2,  3,  // 3.z5 
+	16, 17, 16, 17   // garbage   
+};
+
+
+// magic constants that we use to convert the unpacked q48 components from 2 + n * 2^-22 (where n = 0 .. 65535)
+// to -1.0 .. 1
+#define UnpackMul16s ( (1 << 22) / 32767.5 )
+#define UnpackAdd16s ( ( -UnpackMul16s * 3.0 ) - 1 )
+// we put the constants all into one word to save a little memory bandwidth
+// but otherwise it would look like this:
+// static const fltx4 vUpkMul = { UnpackMul16s, UnpackMul16s, UnpackMul16s, UnpackMul16s };
+// static const fltx4 vUpkAdd = { UnpackAdd16s , UnpackAdd16s , UnpackAdd16s , UnpackAdd16s  };
+const fltx4 g_SIMD_Quat48_Unpack_Magic_Constants = { UnpackMul16s , UnpackAdd16s, 0, 0 };
+#undef UnpackMul16s
+#undef UnpackAdd16s
+
+#endif
+

 // FUNCTIONS
 // NOTE: WHY YOU **DO NOT** WANT TO PUT FUNCTIONS HERE
@ -82,7 +225,7 @@ const uint32 ALIGN16 g_SIMD_SkipTailMask[4][4] ALIGN16_POST =
 // function is more than one screen long, yours is probably not one
 // of those occasions.

-
+#if !defined(__SPU__)

 /// You can use this to rotate a long array of FourVectors all by the same
 /// matrix. The first parameter is the head of the array. The second is the
@ -122,7 +265,7 @@ void FourVectors::RotateManyBy(FourVectors* RESTRICT pVectors, unsigned int numV
 		matSplat22 = SplatZSIMD(matCol2);
 	}

-#ifdef _X360
+#if defined(_X360) || defined(_PS3)
 	// Same algorithm as above, but the loop is unrolled to eliminate data hazard latencies
 	// and simplify prefetching. Named variables are deliberately used instead of arrays to
 	// ensure that the variables live on the registers instead of the stack (stack load/store
@ -216,6 +359,172 @@ void FourVectors::RotateManyBy(FourVectors* RESTRICT pVectors, unsigned int numV
 #endif
 }

+// Get the closest point from P to the (infinite) line through vLineA and vLineB and
+// calculate the shortest distance from P to the line.
+// If you pass in a value for t, it will tell you the t for (A + (B-A)t) to get the closest point.
+// If the closest point lies on the segment between A and B, then 0 <= t <= 1.
+void FourVectors::CalcClosestPointOnLineSIMD(const FourVectors& P, const FourVectors& vLineA, const FourVectors& vLineB, FourVectors& vClosest, fltx4* outT)
+{
+	FourVectors vDir;
+	fltx4 t = CalcClosestPointToLineTSIMD(P, vLineA, vLineB, vDir);
+	if (outT) *outT = t;
+	vClosest = vDir;
+	vClosest *= t;
+	vClosest += vLineA;
+}
+
+fltx4 FourVectors::CalcClosestPointToLineTSIMD(const FourVectors& P, const FourVectors& vLineA, const FourVectors& vLineB, FourVectors& vDir)
+{
+	Assert(s_bMathlibInitialized);
+	vDir = vLineB;
+	vDir -= vLineA;
+
+	fltx4 div = vDir * vDir;
+	bi32x4 Mask;
+	fltx4 Compare = ReplicateX4(0.00001f);
+	fltx4 result;
+	Mask = CmpLtSIMD(div, Compare);
+
+	result = DivSIMD(SubSIMD(vDir * P, vDir * vLineA), div);
+
+	MaskedAssign(Mask, Four_Zeros, result);
+	return result;
+}
+
+void FourVectors::RotateManyBy(FourVectors* RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix, FourVectors* RESTRICT pOut)
+{
+	Assert(numVectors > 0);
+	if (numVectors == 0)
+		return;
+
+	// Splat out each of the entries in the matrix to a fltx4. Do this
+	// in the order that we will need them, to hide latency. I'm
+	// avoiding making an array of them, so that they'll remain in 
+	// registers.
+	fltx4 matSplat00, matSplat01, matSplat02,
+		matSplat10, matSplat11, matSplat12,
+		matSplat20, matSplat21, matSplat22;
+
+	{
+		// Load the matrix into local vectors. Sadly, matrix3x4_ts are 
+		// often unaligned. The w components will be the tranpose row of
+		// the matrix, but we don't really care about that.
+		fltx4 matCol0 = LoadUnalignedSIMD(rotationMatrix[0]);
+		fltx4 matCol1 = LoadUnalignedSIMD(rotationMatrix[1]);
+		fltx4 matCol2 = LoadUnalignedSIMD(rotationMatrix[2]);
+
+		matSplat00 = SplatXSIMD(matCol0);
+		matSplat01 = SplatYSIMD(matCol0);
+		matSplat02 = SplatZSIMD(matCol0);
+
+		matSplat10 = SplatXSIMD(matCol1);
+		matSplat11 = SplatYSIMD(matCol1);
+		matSplat12 = SplatZSIMD(matCol1);
+
+		matSplat20 = SplatXSIMD(matCol2);
+		matSplat21 = SplatYSIMD(matCol2);
+		matSplat22 = SplatZSIMD(matCol2);
+	}
+
+#if  defined(_X360) || defined(_PS3)
+	// Same algorithm as above, but the loop is unrolled to eliminate data hazard latencies
+	// and simplify prefetching. Named variables are deliberately used instead of arrays to
+	// ensure that the variables live on the registers instead of the stack (stack load/store
+	// is a serious penalty on 360).  Nb: for prefetching to be most efficient here, the
+	// loop should be unrolled to 8 FourVectors per iteration; because each FourVectors is 
+	// 48 bytes long, 48 * 8 = 384, its least common multiple with the 128-byte cache line. 
+	// That way you can fetch the next 3 cache lines while you work on these three. 
+	// If you do go this route, be sure to dissassemble and make sure it doesn't spill 
+	// registers to stack as you do this; the cost of that will be excessive. Unroll the loop
+	// a little and just live with the fact that you'll be doing a couple of redundant dbcts
+	// (they don't cost you anything). Be aware that all three cores share L2 and it can only
+	// have eight cache lines fetching at a time.
+	fltx4 outX0, outY0, outZ0; // bank one of outputs
+	fltx4 outX1, outY1, outZ1; // bank two of outputs
+
+
+	// Because of instruction latencies and scheduling, it's actually faster to use adds and muls
+	// rather than madds. (Empirically determined by timing.)
+	const FourVectors* stop = pVectors + numVectors;
+	FourVectors* RESTRICT pVectNext;
+	FourVectors* RESTRICT pOutNext;
+	// prime the pump.
+	if (numVectors & 0x01)
+	{
+		// odd number of vectors to process
+		// prime the 1 group of registers
+		pVectNext = pVectors++;
+		pOutNext = pOut++;
+		outX1 = AddSIMD(AddSIMD(MulSIMD(pVectNext->x, matSplat00), MulSIMD(pVectNext->y, matSplat01)), MulSIMD(pVectNext->z, matSplat02));
+		outY1 = AddSIMD(AddSIMD(MulSIMD(pVectNext->x, matSplat10), MulSIMD(pVectNext->y, matSplat11)), MulSIMD(pVectNext->z, matSplat12));
+		outZ1 = AddSIMD(AddSIMD(MulSIMD(pVectNext->x, matSplat20), MulSIMD(pVectNext->y, matSplat21)), MulSIMD(pVectNext->z, matSplat22));
+	}
+	else
+	{
+		// even number of total vectors to process; 
+		// prime the zero group and jump into the middle of the loop
+		outX0 = AddSIMD(AddSIMD(MulSIMD(pVectors->x, matSplat00), MulSIMD(pVectors->y, matSplat01)), MulSIMD(pVectors->z, matSplat02));
+		outY0 = AddSIMD(AddSIMD(MulSIMD(pVectors->x, matSplat10), MulSIMD(pVectors->y, matSplat11)), MulSIMD(pVectors->z, matSplat12));
+		outZ0 = AddSIMD(AddSIMD(MulSIMD(pVectors->x, matSplat20), MulSIMD(pVectors->y, matSplat21)), MulSIMD(pVectors->z, matSplat22));
+		goto EVEN_CASE;
+	}
+
+	// perform an even number of iterations through this loop.
+	while (pVectors < stop)
+	{
+		outX0 = MaddSIMD(pVectors->z, matSplat02, AddSIMD(MulSIMD(pVectors->x, matSplat00), MulSIMD(pVectors->y, matSplat01)));
+		outY0 = MaddSIMD(pVectors->z, matSplat12, AddSIMD(MulSIMD(pVectors->x, matSplat10), MulSIMD(pVectors->y, matSplat11)));
+		outZ0 = MaddSIMD(pVectors->z, matSplat22, AddSIMD(MulSIMD(pVectors->x, matSplat20), MulSIMD(pVectors->y, matSplat21)));
+
+		pOutNext->x = outX1;
+		pOutNext->y = outY1;
+		pOutNext->z = outZ1;
+
+	EVEN_CASE:
+		pVectNext = pVectors + 1;
+		pOutNext = pOut + 1;
+
+		outX1 = MaddSIMD(pVectNext->z, matSplat02, AddSIMD(MulSIMD(pVectNext->x, matSplat00), MulSIMD(pVectNext->y, matSplat01)));
+		outY1 = MaddSIMD(pVectNext->z, matSplat12, AddSIMD(MulSIMD(pVectNext->x, matSplat10), MulSIMD(pVectNext->y, matSplat11)));
+		outZ1 = MaddSIMD(pVectNext->z, matSplat22, AddSIMD(MulSIMD(pVectNext->x, matSplat20), MulSIMD(pVectNext->y, matSplat21)));
+
+		pOut->x = outX0;
+		pOut->y = outY0;
+		pOut->z = outZ0;
+
+		pVectors += 2;
+		pOut += 2;
+	}
+
+	// flush the last round of output
+	pVectNext->x = outX1;
+	pVectNext->y = outY1;
+	pVectNext->z = outZ1;
+#else
+	// PC does not benefit from the unroll/scheduling above
+	fltx4 outX0, outY0, outZ0; // bank one of outputs
+
+
+	// Because of instruction latencies and scheduling, it's actually faster to use adds and muls
+	// rather than madds. (Empirically determined by timing.)
+	const FourVectors* stop = pVectors + numVectors;
+
+	// perform an even number of iterations through this loop.
+	while (pVectors < stop)
+	{
+		outX0 = MaddSIMD(pVectors->z, matSplat02, AddSIMD(MulSIMD(pVectors->x, matSplat00), MulSIMD(pVectors->y, matSplat01)));
+		outY0 = MaddSIMD(pVectors->z, matSplat12, AddSIMD(MulSIMD(pVectors->x, matSplat10), MulSIMD(pVectors->y, matSplat11)));
+		outZ0 = MaddSIMD(pVectors->z, matSplat22, AddSIMD(MulSIMD(pVectors->x, matSplat20), MulSIMD(pVectors->y, matSplat21)));
+
+		pOut->x = outX0;
+		pOut->y = outY0;
+		pOut->z = outZ0;
+		pVectors++;
+		pOut++;
+	}
+#endif
+}
+
 #ifdef _X360
 // Loop-scheduled code to process FourVectors in groups of eight quite efficiently.
 void FourVectors_TransformManyGroupsOfEightBy(FourVectors* RESTRICT pVectors, unsigned int numVectors, const matrix3x4_t& rotationMatrix, FourVectors* RESTRICT pOut)
@ -1162,4 +1471,9 @@ void TransformManyPointsBy(VectorAligned* RESTRICT pVectors, unsigned int numVec


 }
+
+#endif // #if !defined(__SPU__)
+
+
+
 #endif
--- a/r5dev/mathlib/ssemath.h
+++ b/r5dev/mathlib/ssemath.h
--- a/r5dev/mathlib/ssenoise.cpp
+++ b/r5dev/mathlib/ssenoise.cpp
@ -0,0 +1,232 @@
+//========= Copyright <20> 1996-2006, Valve Corporation, All rights reserved. ============//
+//
+// Purpose: Fast low quality noise suitable for real time use
+//
+//=====================================================================================//
+
+#include "core/stdafx.h"
+#include "tier0/dbg.h"
+#include "mathlib/mathlib.h"
+#include "mathlib/vector.h"
+#include "mathlib/ssemath.h"
+#include "mathlib/noisedata.h"
+
+// memdbgon must be the last include file in a .cpp file!!!
+//#include "tier0/memdbgon.h"
+
+
+#define MAGIC_NUMBER (1<<15)								// gives 8 bits of fraction
+
+static fltx4 Four_MagicNumbers = { MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER };
+
+
+static ALIGN16 int32 idx_mask[4] = { 0xffff, 0xffff, 0xffff, 0xffff };
+
+#define MASK255 (*((fltx4 *)(& idx_mask )))
+
+// returns 0..1
+static inline float GetLatticePointValue(int idx_x, int idx_y, int idx_z)
+{
+	int ret_idx = perm_a[idx_x & 0xff];
+	ret_idx = perm_b[(idx_y + ret_idx) & 0xff];
+	ret_idx = perm_c[(idx_z + ret_idx) & 0xff];
+	return impulse_xcoords[ret_idx];
+
+}
+
+fltx4 NoiseSIMD(const fltx4& x, const fltx4& y, const fltx4& z)
+{
+	// use magic to convert to integer index
+	fltx4 x_idx = AndSIMD(MASK255, AddSIMD(x, Four_MagicNumbers));
+	fltx4 y_idx = AndSIMD(MASK255, AddSIMD(y, Four_MagicNumbers));
+	fltx4 z_idx = AndSIMD(MASK255, AddSIMD(z, Four_MagicNumbers));
+
+	fltx4 lattice000 = Four_Zeros, lattice001 = Four_Zeros, lattice010 = Four_Zeros, lattice011 = Four_Zeros;
+	fltx4 lattice100 = Four_Zeros, lattice101 = Four_Zeros, lattice110 = Four_Zeros, lattice111 = Four_Zeros;
+
+	// FIXME: Converting the input vectors to int indices will cause load-hit-stores (48 bytes)
+	//        Converting the indexed noise values back to vectors will cause more (128 bytes)
+	//        The noise table could store vectors if we chunked it into 2x2x2 blocks.
+	fltx4 xfrac = Four_Zeros, yfrac = Four_Zeros, zfrac = Four_Zeros;
+#define DOPASS(i)															\
+    {	unsigned int xi = SubInt( x_idx, i );								\
+		unsigned int yi = SubInt( y_idx, i );								\
+		unsigned int zi = SubInt( z_idx, i );								\
+		SubFloat( xfrac, i ) = (xi & 0xff)*(1.0/256.0);						\
+		SubFloat( yfrac, i ) = (yi & 0xff)*(1.0/256.0);						\
+		SubFloat( zfrac, i ) = (zi & 0xff)*(1.0/256.0);						\
+		xi>>=8;																\
+		yi>>=8;																\
+		zi>>=8;																\
+																			\
+		SubFloat( lattice000, i ) = GetLatticePointValue( xi,yi,zi );		\
+		SubFloat( lattice001, i ) = GetLatticePointValue( xi,yi,zi+1 );		\
+		SubFloat( lattice010, i ) = GetLatticePointValue( xi,yi+1,zi );		\
+		SubFloat( lattice011, i ) = GetLatticePointValue( xi,yi+1,zi+1 );	\
+		SubFloat( lattice100, i ) = GetLatticePointValue( xi+1,yi,zi );		\
+		SubFloat( lattice101, i ) = GetLatticePointValue( xi+1,yi,zi+1 );	\
+		SubFloat( lattice110, i ) = GetLatticePointValue( xi+1,yi+1,zi );	\
+		SubFloat( lattice111, i ) = GetLatticePointValue( xi+1,yi+1,zi+1 );	\
+    }
+
+	DOPASS(0);
+	DOPASS(1);
+	DOPASS(2);
+	DOPASS(3);
+
+	// now, we have 8 lattice values for each of four points as m128s, and interpolant values for
+	// each axis in m128 form in [xyz]frac. Perfom the trilinear interpolation as SIMD ops
+
+	// first, do x interpolation
+	fltx4 l2d00 = AddSIMD(lattice000, MulSIMD(xfrac, SubSIMD(lattice100, lattice000)));
+	fltx4 l2d01 = AddSIMD(lattice001, MulSIMD(xfrac, SubSIMD(lattice101, lattice001)));
+	fltx4 l2d10 = AddSIMD(lattice010, MulSIMD(xfrac, SubSIMD(lattice110, lattice010)));
+	fltx4 l2d11 = AddSIMD(lattice011, MulSIMD(xfrac, SubSIMD(lattice111, lattice011)));
+
+	// now, do y interpolation
+	fltx4 l1d0 = AddSIMD(l2d00, MulSIMD(yfrac, SubSIMD(l2d10, l2d00)));
+	fltx4 l1d1 = AddSIMD(l2d01, MulSIMD(yfrac, SubSIMD(l2d11, l2d01)));
+
+	// final z interpolation
+	fltx4 rslt = AddSIMD(l1d0, MulSIMD(zfrac, SubSIMD(l1d1, l1d0)));
+
+	// map to 0..1
+	return MulSIMD(Four_Twos, SubSIMD(rslt, Four_PointFives));
+
+
+}
+
+static inline void GetVectorLatticePointValue(int idx, fltx4& x, fltx4& y, fltx4& z,
+	int idx_x, int idx_y, int idx_z)
+{
+	int ret_idx = perm_a[idx_x & 0xff];
+	ret_idx = perm_b[(idx_y + ret_idx) & 0xff];
+	ret_idx = perm_c[(idx_z + ret_idx) & 0xff];
+	float const* pData = s_randomGradients + ret_idx * 3;
+	SubFloat(x, idx) = pData[0];
+	SubFloat(y, idx) = pData[1];
+	SubFloat(z, idx) = pData[2];
+
+}
+
+FourVectors DNoiseSIMD(const fltx4& x, const fltx4& y, const fltx4& z)
+{
+	// use magic to convert to integer index
+	fltx4 x_idx = AndSIMD(MASK255, AddSIMD(x, Four_MagicNumbers));
+	fltx4 y_idx = AndSIMD(MASK255, AddSIMD(y, Four_MagicNumbers));
+	fltx4 z_idx = AndSIMD(MASK255, AddSIMD(z, Four_MagicNumbers));
+
+	fltx4 xlattice000 = Four_Zeros, xlattice001 = Four_Zeros, xlattice010 = Four_Zeros, xlattice011 = Four_Zeros;
+	fltx4 xlattice100 = Four_Zeros, xlattice101 = Four_Zeros, xlattice110 = Four_Zeros, xlattice111 = Four_Zeros;
+	fltx4 ylattice000 = Four_Zeros, ylattice001 = Four_Zeros, ylattice010 = Four_Zeros, ylattice011 = Four_Zeros;
+	fltx4 ylattice100 = Four_Zeros, ylattice101 = Four_Zeros, ylattice110 = Four_Zeros, ylattice111 = Four_Zeros;
+	fltx4 zlattice000 = Four_Zeros, zlattice001 = Four_Zeros, zlattice010 = Four_Zeros, zlattice011 = Four_Zeros;
+	fltx4 zlattice100 = Four_Zeros, zlattice101 = Four_Zeros, zlattice110 = Four_Zeros, zlattice111 = Four_Zeros;
+
+	// FIXME: Converting the input vectors to int indices will cause load-hit-stores (48 bytes)
+	//        Converting the indexed noise values back to vectors will cause more (128 bytes)
+	//        The noise table could store vectors if we chunked it into 2x2x2 blocks.
+	fltx4 xfrac = Four_Zeros, yfrac = Four_Zeros, zfrac = Four_Zeros;
+#define DODPASS(i)															\
+    {	unsigned int xi = SubInt( x_idx, i );								\
+		unsigned int yi = SubInt( y_idx, i );								\
+		unsigned int zi = SubInt( z_idx, i );								\
+		SubFloat( xfrac, i ) = (xi & 0xff)*(1.0/256.0);						\
+		SubFloat( yfrac, i ) = (yi & 0xff)*(1.0/256.0);						\
+		SubFloat( zfrac, i ) = (zi & 0xff)*(1.0/256.0);						\
+		xi>>=8;																\
+		yi>>=8;																\
+		zi>>=8;																\
+																			\
+		GetVectorLatticePointValue( i, xlattice000, ylattice000, zlattice000, xi,yi,zi );		\
+		GetVectorLatticePointValue( i, xlattice001, ylattice001, zlattice001, xi,yi,zi+1 );		\
+		GetVectorLatticePointValue( i, xlattice010, ylattice010, zlattice010, xi,yi+1,zi );		\
+		GetVectorLatticePointValue( i, xlattice011, ylattice011, zlattice011, xi,yi+1,zi+1 );	\
+		GetVectorLatticePointValue( i, xlattice100, ylattice100, zlattice100, xi+1,yi,zi );		\
+		GetVectorLatticePointValue( i, xlattice101, ylattice101, zlattice101, xi+1,yi,zi+1 );	\
+		GetVectorLatticePointValue( i, xlattice110, ylattice110, zlattice110, xi+1,yi+1,zi );	\
+		GetVectorLatticePointValue( i, xlattice111, ylattice111, zlattice111, xi+1,yi+1,zi+1 );	\
+    }
+
+	DODPASS(0);
+	DODPASS(1);
+	DODPASS(2);
+	DODPASS(3);
+
+	// now, we have 8 lattice values for each of four points as m128s, and interpolant values for
+	// each axis in m128 form in [xyz]frac. Perfom the trilinear interpolation as SIMD ops
+
+	// first, do x interpolation
+	fltx4 xl2d00 = AddSIMD(xlattice000, MulSIMD(xfrac, SubSIMD(xlattice100, xlattice000)));
+	fltx4 xl2d01 = AddSIMD(xlattice001, MulSIMD(xfrac, SubSIMD(xlattice101, xlattice001)));
+	fltx4 xl2d10 = AddSIMD(xlattice010, MulSIMD(xfrac, SubSIMD(xlattice110, xlattice010)));
+	fltx4 xl2d11 = AddSIMD(xlattice011, MulSIMD(xfrac, SubSIMD(xlattice111, xlattice011)));
+
+	// now, do y interpolation
+	fltx4 xl1d0 = AddSIMD(xl2d00, MulSIMD(yfrac, SubSIMD(xl2d10, xl2d00)));
+	fltx4 xl1d1 = AddSIMD(xl2d01, MulSIMD(yfrac, SubSIMD(xl2d11, xl2d01)));
+
+	// final z interpolation
+	FourVectors rslt;
+	rslt.x = AddSIMD(xl1d0, MulSIMD(zfrac, SubSIMD(xl1d1, xl1d0)));
+
+	fltx4 yl2d00 = AddSIMD(ylattice000, MulSIMD(xfrac, SubSIMD(ylattice100, ylattice000)));
+	fltx4 yl2d01 = AddSIMD(ylattice001, MulSIMD(xfrac, SubSIMD(ylattice101, ylattice001)));
+	fltx4 yl2d10 = AddSIMD(ylattice010, MulSIMD(xfrac, SubSIMD(ylattice110, ylattice010)));
+	fltx4 yl2d11 = AddSIMD(ylattice011, MulSIMD(xfrac, SubSIMD(ylattice111, ylattice011)));
+
+	// now, do y interpolation
+	fltx4 yl1d0 = AddSIMD(yl2d00, MulSIMD(yfrac, SubSIMD(yl2d10, yl2d00)));
+	fltx4 yl1d1 = AddSIMD(yl2d01, MulSIMD(yfrac, SubSIMD(yl2d11, yl2d01)));
+
+	// final z interpolation
+	rslt.y = AddSIMD(yl1d0, MulSIMD(zfrac, SubSIMD(yl1d1, yl1d0)));
+
+	fltx4 zl2d00 = AddSIMD(zlattice000, MulSIMD(xfrac, SubSIMD(zlattice100, zlattice000)));
+	fltx4 zl2d01 = AddSIMD(zlattice001, MulSIMD(xfrac, SubSIMD(zlattice101, zlattice001)));
+	fltx4 zl2d10 = AddSIMD(zlattice010, MulSIMD(xfrac, SubSIMD(zlattice110, zlattice010)));
+	fltx4 zl2d11 = AddSIMD(zlattice011, MulSIMD(xfrac, SubSIMD(zlattice111, zlattice011)));
+
+	// now, do y interpolation
+	fltx4 zl1d0 = AddSIMD(zl2d00, MulSIMD(yfrac, SubSIMD(zl2d10, zl2d00)));
+	fltx4 zl1d1 = AddSIMD(zl2d01, MulSIMD(yfrac, SubSIMD(zl2d11, zl2d01)));
+
+	// final z interpolation
+	rslt.z = AddSIMD(zl1d0, MulSIMD(zfrac, SubSIMD(zl1d1, zl1d0)));
+
+	return rslt;
+
+
+}
+
+fltx4 NoiseSIMD(FourVectors const& pos)
+{
+	return NoiseSIMD(pos.x, pos.y, pos.z);
+}
+
+FourVectors DNoiseSIMD(FourVectors const& pos)
+{
+	return DNoiseSIMD(pos.x, pos.y, pos.z);
+}
+
+FourVectors CurlNoiseSIMD(FourVectors const& pos)
+{
+	FourVectors fl4Comp1 = DNoiseSIMD(pos);
+	FourVectors fl4Pos = pos;
+	fl4Pos.x = AddSIMD(fl4Pos.x, ReplicateX4(43.256));
+	fl4Pos.y = AddSIMD(fl4Pos.y, ReplicateX4(-67.89));
+	fl4Pos.z = AddSIMD(fl4Pos.z, ReplicateX4(1338.2));
+	FourVectors fl4Comp2 = DNoiseSIMD(fl4Pos);
+	fl4Pos.x = AddSIMD(fl4Pos.x, ReplicateX4(-129.856));
+	fl4Pos.y = AddSIMD(fl4Pos.y, ReplicateX4(-967.23));
+	fl4Pos.z = AddSIMD(fl4Pos.z, ReplicateX4(2338.98));
+	FourVectors fl4Comp3 = DNoiseSIMD(fl4Pos);
+
+	// now we have the 3 derivatives of a vector valued field. return the curl of the field.
+	FourVectors fl4Ret;
+	fl4Ret.x = SubSIMD(fl4Comp3.y, fl4Comp2.z);
+	fl4Ret.y = SubSIMD(fl4Comp1.z, fl4Comp3.x);
+	fl4Ret.z = SubSIMD(fl4Comp2.x, fl4Comp1.y);
+	return fl4Ret;
+
+}
--- a/r5dev/mathlib/ssenoise.h
+++ b/r5dev/mathlib/ssenoise.h
@ -1,107 +0,0 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
-//
-// Purpose: Fast low quality noise suitable for real time use
-//
-//=====================================================================================//
-
-#include "core/stdafx.h"
-#include "tier0/dbg.h"
-#include "tier0/basetypes.h"
-#include "mathlib/mathlib.h"
-#include "mathlib/vector.h"
-#include "mathlib/ssemath.h"
-
-// memdbgon must be the last include file in a .cpp file!!!
-//#include "tier0/memdbgon.h"
-#include "noisedata.h"
-
-
-#define MAGIC_NUMBER (1<<15)								// gives 8 bits of fraction
-
-static fltx4 Four_MagicNumbers = { MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER, MAGIC_NUMBER };
-
-
-static ALIGN16 int32 idx_mask[4] = { 0xffff, 0xffff, 0xffff, 0xffff };
-
-#define MASK255 (*((fltx4 *)(& idx_mask )))
-
-// returns 0..1
-static inline float GetLatticePointValue(int idx_x, int idx_y, int idx_z)
-{
-	NOTE_UNUSED(perm_d);
-	NOTE_UNUSED(impulse_ycoords);
-	NOTE_UNUSED(impulse_zcoords);
-
-	int ret_idx = perm_a[idx_x & 0xff];
-	ret_idx = perm_b[(idx_y + ret_idx) & 0xff];
-	ret_idx = perm_c[(idx_z + ret_idx) & 0xff];
-	return impulse_xcoords[ret_idx];
-
-}
-
-fltx4 NoiseSIMD(const fltx4& x, const fltx4& y, const fltx4& z)
-{
-	// use magic to convert to integer index
-	fltx4 x_idx = AndSIMD(MASK255, AddSIMD(x, Four_MagicNumbers));
-	fltx4 y_idx = AndSIMD(MASK255, AddSIMD(y, Four_MagicNumbers));
-	fltx4 z_idx = AndSIMD(MASK255, AddSIMD(z, Four_MagicNumbers));
-
-	fltx4 lattice000 = Four_Zeros, lattice001 = Four_Zeros, lattice010 = Four_Zeros, lattice011 = Four_Zeros;
-	fltx4 lattice100 = Four_Zeros, lattice101 = Four_Zeros, lattice110 = Four_Zeros, lattice111 = Four_Zeros;
-
-	// FIXME: Converting the input vectors to int indices will cause load-hit-stores (48 bytes)
-	//        Converting the indexed noise values back to vectors will cause more (128 bytes)
-	//        The noise table could store vectors if we chunked it into 2x2x2 blocks.
-	fltx4 xfrac = Four_Zeros, yfrac = Four_Zeros, zfrac = Four_Zeros;
-#define DOPASS(i)															\
-    {	unsigned int xi = SubInt( x_idx, i );								\
-		unsigned int yi = SubInt( y_idx, i );								\
-		unsigned int zi = SubInt( z_idx, i );								\
-		SubFloat( xfrac, i ) = (xi & 0xff)*(1.0/256.0);						\
-		SubFloat( yfrac, i ) = (yi & 0xff)*(1.0/256.0);						\
-		SubFloat( zfrac, i ) = (zi & 0xff)*(1.0/256.0);						\
-		xi>>=8;																\
-		yi>>=8;																\
-		zi>>=8;																\
-																			\
-		SubFloat( lattice000, i ) = GetLatticePointValue( xi,yi,zi );		\
-		SubFloat( lattice001, i ) = GetLatticePointValue( xi,yi,zi+1 );		\
-		SubFloat( lattice010, i ) = GetLatticePointValue( xi,yi+1,zi );		\
-		SubFloat( lattice011, i ) = GetLatticePointValue( xi,yi+1,zi+1 );	\
-		SubFloat( lattice100, i ) = GetLatticePointValue( xi+1,yi,zi );		\
-		SubFloat( lattice101, i ) = GetLatticePointValue( xi+1,yi,zi+1 );	\
-		SubFloat( lattice110, i ) = GetLatticePointValue( xi+1,yi+1,zi );	\
-		SubFloat( lattice111, i ) = GetLatticePointValue( xi+1,yi+1,zi+1 );	\
-    }
-
-	DOPASS(0);
-	DOPASS(1);
-	DOPASS(2);
-	DOPASS(3);
-
-	// now, we have 8 lattice values for each of four points as m128s, and interpolant values for
-	// each axis in m128 form in [xyz]frac. Perfom the trilinear interpolation as SIMD ops
-
-	// first, do x interpolation
-	fltx4 l2d00 = AddSIMD(lattice000, MulSIMD(xfrac, SubSIMD(lattice100, lattice000)));
-	fltx4 l2d01 = AddSIMD(lattice001, MulSIMD(xfrac, SubSIMD(lattice101, lattice001)));
-	fltx4 l2d10 = AddSIMD(lattice010, MulSIMD(xfrac, SubSIMD(lattice110, lattice010)));
-	fltx4 l2d11 = AddSIMD(lattice011, MulSIMD(xfrac, SubSIMD(lattice111, lattice011)));
-
-	// now, do y interpolation
-	fltx4 l1d0 = AddSIMD(l2d00, MulSIMD(yfrac, SubSIMD(l2d10, l2d00)));
-	fltx4 l1d1 = AddSIMD(l2d01, MulSIMD(yfrac, SubSIMD(l2d11, l2d01)));
-
-	// final z interpolation
-	fltx4 rslt = AddSIMD(l1d0, MulSIMD(zfrac, SubSIMD(l1d1, l1d0)));
-
-	// map to 0..1
-	return MulSIMD(Four_Twos, SubSIMD(rslt, Four_PointFives));
-
-
-}
-
-fltx4 NoiseSIMD(FourVectors const& pos)
-{
-	return NoiseSIMD(pos.x, pos.y, pos.z);
-}
--- a/r5dev/mathlib/ssequaternion.h
+++ b/r5dev/mathlib/ssequaternion.h
--- a/r5dev/mathlib/transform.cpp
+++ b/r5dev/mathlib/transform.cpp
@ -0,0 +1,179 @@
+//==== Copyright (c) 1996-2011, Valve Corporation, All rights reserved. =====//
+//
+// Purpose: 
+//
+// $NoKeywords: $
+//
+//===========================================================================//
+
+#include "core/stdafx.h"
+#if !defined(_STATIC_LINKED) || defined(_SHARED_LIB)
+
+#include "mathlib/transform.h"
+#include "mathlib/mathlib.h"
+
+// memdbgon must be the last include file in a .cpp file!!!
+//#include "tier0/memdbgon.h"
+
+const CTransform g_TransformIdentity(Vector3D(0.0f, 0.0f, 0.0f), Quaternion(0.0f, 0.0f, 0.0f, 1.0f));
+
+void SetIdentityTransform(CTransform& out)
+{
+	out.m_vPosition = vec3_origin;
+	out.m_orientation = quat_identity;
+}
+
+void ConcatTransforms(const CTransform& in1, const CTransform& in2, CTransform& out)
+{
+	// Store in temp to avoid problems if out == in1 or out == in2
+	CTransform result;
+	QuaternionMult(in1.m_orientation, in2.m_orientation, result.m_orientation);
+	QuaternionMultiply(in1.m_orientation, in2.m_vPosition, result.m_vPosition);
+	result.m_vPosition += in1.m_vPosition;
+	out = result;
+}
+
+void VectorIRotate(const Vector3D& v, const CTransform& t, Vector3D& out)
+{
+	// FIXME: Make work directly with the transform
+	matrix3x4_t m;
+	TransformMatrix(t, m);
+	VectorIRotate(v, m, out);
+}
+
+void VectorITransform(const Vector3D& v, const CTransform& t, Vector3D& out)
+{
+	// FIXME: Make work directly with the transform
+	matrix3x4_t m;
+	TransformMatrix(t, m);
+	VectorITransform(v, m, out);
+}
+
+void TransformSlerp(const CTransform& p, const CTransform& q, float t, CTransform& qt)
+{
+	QuaternionSlerp(p.m_orientation, q.m_orientation, t, qt.m_orientation);
+	VectorLerp(p.m_vPosition, q.m_vPosition, t, qt.m_vPosition);
+}
+
+void TransformLerp(const CTransform& p, const CTransform& q, float t, CTransform& qt)
+{
+	QuaternionBlend(p.m_orientation, q.m_orientation, t, qt.m_orientation);
+	VectorLerp(p.m_vPosition, q.m_vPosition, t, qt.m_vPosition);
+}
+
+void TransformMatrix(const CTransform& in, matrix3x4_t& out)
+{
+	QuaternionMatrix(in.m_orientation, in.m_vPosition, out);
+}
+
+void TransformMatrix(const CTransformUnaligned& in, matrix3x4_t& out)
+{
+	QuaternionMatrix(in.m_orientation, in.m_vPosition, out);
+}
+
+void TransformMatrix(const CTransform& in, const Vector3D& vScaleIn, matrix3x4_t& out)
+{
+	QuaternionMatrix(in.m_orientation, in.m_vPosition, vScaleIn, out);
+}
+
+void MatrixTransform(const matrix3x4_t& in, CTransformUnaligned& out)
+{
+	MatrixQuaternion(in, out.m_orientation);
+	MatrixGetColumn(in, ORIGIN, out.m_vPosition);
+}
+
+void MatrixTransform(const matrix3x4_t& in, CTransform& out)
+{
+	MatrixQuaternion(in, out.m_orientation);
+	MatrixGetColumn(in, ORIGIN, out.m_vPosition);
+}
+
+void MatrixTransform(const matrix3x4_t& in, CTransform& out, Vector3D& vScaleOut)
+{
+	matrix3x4_t norm;
+	vScaleOut = MatrixNormalize(in, norm);
+	MatrixTransform(norm, out);
+}
+
+void AngleTransform(const QAngle& angles, const Vector3D& origin, CTransform& out)
+{
+	AngleQuaternion(angles, out.m_orientation);
+	out.m_vPosition = origin;
+}
+
+void TransformInvert(const CTransform& in, CTransform& out)
+{
+	QuaternionInvert(in.m_orientation, out.m_orientation);
+	QuaternionMultiply(out.m_orientation, in.m_vPosition, out.m_vPosition);
+	out.m_vPosition *= -1.0f;
+}
+
+void AxisAngleTransform(const Vector3D& vecAxis, float flAngleDegrees, CTransform& out)
+{
+	AxisAngleQuaternion(vecAxis, flAngleDegrees, out.m_orientation);
+	out.m_vPosition = vec3_origin;
+}
+
+void TransformVectorsFLU(const CTransform& in, Vector3D* pForward, Vector3D* pLeft, Vector3D* pUp)
+{
+	QuaternionVectorsFLU(in.m_orientation, pForward, pLeft, pUp);
+}
+
+void TransformVectorsForward(const CTransform& in, Vector3D* pForward)
+{
+	QuaternionVectorsForward(in.m_orientation, pForward);
+}
+
+bool TransformsAreEqual(const CTransform& src1, const CTransform& src2, float flPosTolerance, float flRotTolerance)
+{
+	if (!VectorsAreEqual(src1.m_vPosition, src2.m_vPosition, flPosTolerance))
+		return false;
+	return QuaternionsAreEqual(src1.m_orientation, src2.m_orientation, flRotTolerance);
+}
+
+// FIXME: optimize this with simd goodness
+void TransformToWorldSpace(int nRootTransformCount, int nTransformCount, const int* pParentIndices, CTransform* pTransforms)
+{
+#ifdef _DEBUG
+	for (int i = 0; i < nRootTransformCount; ++i)
+	{
+		Assert(pParentIndices[i] < 0);
+	}
+#endif
+
+	for (int i = nRootTransformCount; i < nTransformCount; ++i)
+	{
+		int nParentBone = pParentIndices[i];
+		Assert(nParentBone >= 0 && nParentBone < i);
+		ConcatTransforms(pTransforms[nParentBone], pTransforms[i], pTransforms[i]);
+	}
+}
+
+// FIXME: optimize this with simd goodness
+void TransformToParentSpace(int nRootTransformCount, int nTransformCount, const int* pParentIndices, CTransform* pTransforms)
+{
+#ifdef _DEBUG
+	for (int i = 0; i < nRootTransformCount; ++i)
+	{
+		Assert(pParentIndices[i] < 0);
+	}
+#endif
+
+	bool* pComputedParentTransform = (bool*)stackalloc(nTransformCount * sizeof(bool));
+	memset(pComputedParentTransform, 0, nTransformCount * sizeof(bool));
+	CTransform* pWorldToParentTransforms = (CTransform*)stackalloc(nTransformCount * sizeof(CTransform));
+
+	for (int b = nTransformCount; --b >= nRootTransformCount; )
+	{
+		int nParentBone = pParentIndices[b];
+		if (!pComputedParentTransform[nParentBone])
+		{
+			TransformInvert(pTransforms[nParentBone], pWorldToParentTransforms[nParentBone]);
+			pComputedParentTransform[nParentBone] = true;
+		}
+		ConcatTransforms(pWorldToParentTransforms[nParentBone], pTransforms[b], pTransforms[b]);
+	}
+}
+
+#endif // !_STATIC_LINKED || _SHARED_LIB
+
--- a/r5dev/mathlib/transform.h
+++ b/r5dev/mathlib/transform.h
@ -0,0 +1,401 @@
+//====== Copyright 1996-2005, Valve Corporation, All rights reserved. =======//
+//
+// Purpose: 
+//
+// $NoKeywords: $
+//
+//===========================================================================//
+
+#ifndef TRANSFORM_H
+#define TRANSFORM_H
+
+#ifdef COMPILER_MSVC
+#pragma once
+#endif
+
+//#include "tier0/memalloc.h"
+#include "mathlib/vector.h"
+#include "mathlib/mathlib.h"
+
+//-----------------------------------------------------------------------------
+// Matrix 3x4_t
+//-----------------------------------------------------------------------------
+class CTransformUnaligned;
+
+
+//-----------------------------------------------------------------------------
+// Represents a position + orientation using quaternions
+//-----------------------------------------------------------------------------
+class ALIGN16 CTransform
+{
+public:
+	CTransform() {}
+	CTransform(const Vector3D& v, const Quaternion& q) : m_vPosition(v), m_orientation(q) {}
+	CTransform(const Vector3D& v, const QAngle& a) : m_vPosition(v)
+	{
+		AngleQuaternion(a, m_orientation);
+	}
+
+	VectorAligned m_vPosition;
+	QuaternionAligned m_orientation;
+
+	bool IsValid() const
+	{
+		return m_vPosition.IsValid() && m_orientation.IsValid();
+	}
+
+	bool operator==(const CTransform& v) const;					///< exact equality check
+	bool operator!=(const CTransform& v) const;
+
+	// for API compatibility with matrix3x4_t
+	inline void InitFromQAngles(const QAngle& angles, const Vector3D& vPosition = vec3_origin);
+	inline void InitFromMatrix(const matrix3x4_t& transform);
+	inline void InitFromQuaternion(const Quaternion& orientation, const Vector3D& vPosition = vec3_origin);
+
+	inline Quaternion ToQuaternion() const;
+	inline QAngle ToQAngle() const;
+	inline matrix3x4_t ToMatrix() const;
+
+	inline void SetToIdentity();
+
+	inline void SetOrigin(Vector3D const& vPos) { m_vPosition = vPos; }
+	inline void SetAngles(QAngle const& vAngles);
+	inline Vector3D GetOrigin(void) const { return m_vPosition; }
+
+	inline void GetBasisVectorsFLU(Vector3D* pForward, Vector3D* pLeft, Vector3D* pUp) const;
+	inline Vector3D GetForward() const;
+	inline Vector3D TransformVector(const Vector3D& v0) const;
+	inline Vector3D RotateVector(const Vector3D& v0) const;
+	inline Vector3D TransformVectorByInverse(const Vector3D& v0) const;
+	inline Vector3D RotateVectorByInverse(const Vector3D& v0) const;
+	inline Vector3D RotateExtents(const Vector3D& vBoxExtents) const; // these are extents and must remain positive/symmetric after rotation
+	inline void TransformAABB(const Vector3D& vecMinsIn, const Vector3D& vecMaxsIn, Vector3D& vecMinsOut, Vector3D& vecMaxsOut) const;
+	inline void TransformAABBByInverse(const Vector3D& vecMinsIn, const Vector3D& vecMaxsIn, Vector3D& vecMinsOut, Vector3D& vecMaxsOut) const;
+	inline void RotateAABB(const Vector3D& vecMinsIn, const Vector3D& vecMaxsIn, Vector3D& vecMinsOut, Vector3D& vecMaxsOut) const;
+	inline void RotateAABBByInverse(const Vector3D& vecMinsIn, const Vector3D& vecMaxsIn, Vector3D& vecMinsOut, Vector3D& vecMaxsOut) const;
+	//inline void TransformPlane( const cplane_t &inPlane, cplane_t &outPlane ) const;
+	//inline void InverseTransformPlane( const cplane_t &inPlane, cplane_t &outPlane ) const;
+
+	/// Computes an inverse.  Uses the 'TR' naming to be consistent with the same method in matrix3x4_t (which only works with orthonormal matrices) 
+	inline void InverseTR(CTransform& out) const;
+
+public:
+	CTransform& operator=(const CTransformUnaligned& i);
+} ALIGN16_POST;
+
+
+extern const CTransform g_TransformIdentity;
+
+
+//-----------------------------------------------------------------------------
+// Represents an unaligned position + orientation using quaternions,
+// used only for copying data around
+//-----------------------------------------------------------------------------
+class CTransformUnaligned
+{
+public:
+	CTransformUnaligned() {}
+	CTransformUnaligned(const Vector3D& v, const Quaternion& q) : m_vPosition(v), m_orientation(q) {}
+	CTransformUnaligned(const CTransform& transform) : m_vPosition(transform.m_vPosition), m_orientation(transform.m_orientation) {}
+	CTransform AsTransform() const { return CTransform(m_vPosition, m_orientation); }
+
+	Vector3D m_vPosition;
+	Quaternion m_orientation;
+
+	bool IsValid() const
+	{
+		return m_vPosition.IsValid() && m_orientation.IsValid();
+	}
+
+public:
+	CTransformUnaligned& operator=(const CTransform& i);
+};
+
+
+//-----------------------------------------------------------------------------
+// Inline methods
+//-----------------------------------------------------------------------------
+inline CTransform& CTransform::operator=(const CTransformUnaligned& i)
+{
+	m_vPosition = i.m_vPosition;
+	m_orientation = i.m_orientation;
+	return *this;
+}
+
+inline CTransformUnaligned& CTransformUnaligned::operator=(const CTransform& i)
+{
+	m_vPosition = i.m_vPosition;
+	m_orientation = i.m_orientation;
+	return *this;
+}
+
+
+//-----------------------------------------------------------------------------
+// Other methods
+//-----------------------------------------------------------------------------
+void ConcatTransforms(const CTransform& in1, const CTransform& in2, CTransform& out);
+void TransformSlerp(const CTransform& p, const CTransform& q, float t, CTransform& qt);
+void TransformLerp(const CTransform& p, const CTransform& q, float t, CTransform& qt);
+void TransformMatrix(const CTransform& in, matrix3x4_t& out);
+void TransformMatrix(const CTransform& in, const Vector3D& vScaleIn, matrix3x4_t& out);
+
+inline void TransformMatrix(const CTransform& in, float flScale, matrix3x4_t& out)
+{
+	QuaternionMatrix(in.m_orientation, in.m_vPosition, Vector3D(flScale, flScale, flScale), out);
+}
+
+inline float TransformNormalize(CTransform& in)
+{
+	return QuaternionNormalize(in.m_orientation);
+}
+
+void TransformMatrix(const CTransformUnaligned& in, matrix3x4_t& out);
+void MatrixTransform(const matrix3x4_t& in, CTransform& out);
+void MatrixTransform(const matrix3x4_t& in, CTransformUnaligned& out);
+void MatrixTransform(const matrix3x4_t& in, CTransform& out, Vector3D& vScaleOut);
+
+inline void MatrixTransform(const matrix3x4_t& in, CTransform& out, float& flScale)
+{
+	Vector3D vScale;
+	MatrixTransform(in, out, vScale);
+	flScale = vScale.LargestComponentValue();
+}
+
+void AngleTransform(const QAngle& angles, const Vector3D& origin, CTransform& out);
+void SetIdentityTransform(CTransform& out);
+void TransformVectorsFLU(const CTransform& in, Vector3D* pForward, Vector3D* pLeft, Vector3D* pUp);
+void TransformVectorsForward(const CTransform& in, Vector3D* pForward);
+
+inline const CTransform GetIdentityTransform()
+{
+	CTransform out;
+	SetIdentityTransform(out);
+	return out;
+}
+
+inline const CTransform MatrixTransform(const matrix3x4_t& in)
+{
+	CTransform out;
+	MatrixTransform(in, out);
+	return out;
+}
+
+inline const matrix3x4_t TransformMatrix(const CTransform& in)
+{
+	matrix3x4_t out;
+	TransformMatrix(in, out);
+	return out;
+}
+inline const matrix3x4_t TransformMatrix(const CTransformUnaligned& in)
+{
+	matrix3x4_t out;
+	TransformMatrix(in, out);
+	return out;
+}
+
+inline const CTransform ConcatTransforms(const CTransform& in1, const CTransform& in2)
+{
+	CTransform result;
+	ConcatTransforms(in1, in2, result);
+	return result;
+}
+
+
+void TransformInvert(const CTransform& in, CTransform& out);
+void AxisAngleTransform(const Vector3D& vecAxis, float flAngleDegrees, CTransform& out);
+void VectorIRotate(const Vector3D& v, const CTransform& t, Vector3D& out);
+void VectorITransform(const Vector3D& v, const CTransform& t, Vector3D& out);
+
+inline Vector3D TransformPoint(const CTransformUnaligned& tm, const Vector3D& p)
+{
+	return Vector3D(
+		tm.m_vPosition.x + (1.0f - 2.0f * tm.m_orientation.y * tm.m_orientation.y - 2.0f * tm.m_orientation.z * tm.m_orientation.z) * p.x + (2.0f * tm.m_orientation.x * tm.m_orientation.y - 2.0f * tm.m_orientation.w * tm.m_orientation.z) * p.y + (2.0f * tm.m_orientation.x * tm.m_orientation.z + 2.0f * tm.m_orientation.w * tm.m_orientation.y) * p.z,
+		tm.m_vPosition.y + (2.0f * tm.m_orientation.x * tm.m_orientation.y + 2.0f * tm.m_orientation.w * tm.m_orientation.z) * p.x + (1.0f - 2.0f * tm.m_orientation.x * tm.m_orientation.x - 2.0f * tm.m_orientation.z * tm.m_orientation.z) * p.y + (2.0f * tm.m_orientation.y * tm.m_orientation.z - 2.0f * tm.m_orientation.w * tm.m_orientation.x) * p.z,
+		tm.m_vPosition.z + (2.0f * tm.m_orientation.x * tm.m_orientation.z - 2.0f * tm.m_orientation.w * tm.m_orientation.y) * p.x + (2.0f * tm.m_orientation.y * tm.m_orientation.z + 2.0f * tm.m_orientation.w * tm.m_orientation.x) * p.y + (1.0f - 2.0f * tm.m_orientation.x * tm.m_orientation.x - 2.0f * tm.m_orientation.y * tm.m_orientation.y) * p.z
+	);
+}
+
+// TODO: implement in SIMD?
+inline Vector3D TransformPoint(const CTransform& tm, const Vector3D& p)
+{
+	return Vector3D(
+		tm.m_vPosition.x + (1.0f - 2.0f * tm.m_orientation.y * tm.m_orientation.y - 2.0f * tm.m_orientation.z * tm.m_orientation.z) * p.x + (2.0f * tm.m_orientation.x * tm.m_orientation.y - 2.0f * tm.m_orientation.w * tm.m_orientation.z) * p.y + (2.0f * tm.m_orientation.x * tm.m_orientation.z + 2.0f * tm.m_orientation.w * tm.m_orientation.y) * p.z,
+		tm.m_vPosition.y + (2.0f * tm.m_orientation.x * tm.m_orientation.y + 2.0f * tm.m_orientation.w * tm.m_orientation.z) * p.x + (1.0f - 2.0f * tm.m_orientation.x * tm.m_orientation.x - 2.0f * tm.m_orientation.z * tm.m_orientation.z) * p.y + (2.0f * tm.m_orientation.y * tm.m_orientation.z - 2.0f * tm.m_orientation.w * tm.m_orientation.x) * p.z,
+		tm.m_vPosition.z + (2.0f * tm.m_orientation.x * tm.m_orientation.z - 2.0f * tm.m_orientation.w * tm.m_orientation.y) * p.x + (2.0f * tm.m_orientation.y * tm.m_orientation.z + 2.0f * tm.m_orientation.w * tm.m_orientation.x) * p.y + (1.0f - 2.0f * tm.m_orientation.x * tm.m_orientation.x - 2.0f * tm.m_orientation.y * tm.m_orientation.y) * p.z
+	);
+}
+
+
+template < class T >
+inline void TransformPoint(const T& tm, const Vector3D& p, Vector3D& out)
+{
+	out.x = tm.m_vPosition.x + (1.0f - 2.0f * tm.m_orientation.y * tm.m_orientation.y - 2.0f * tm.m_orientation.z * tm.m_orientation.z) * p.x + (2.0f * tm.m_orientation.x * tm.m_orientation.y - 2.0f * tm.m_orientation.w * tm.m_orientation.z) * p.y + (2.0f * tm.m_orientation.x * tm.m_orientation.z + 2.0f * tm.m_orientation.w * tm.m_orientation.y) * p.z;
+	out.y = tm.m_vPosition.y + (2.0f * tm.m_orientation.x * tm.m_orientation.y + 2.0f * tm.m_orientation.w * tm.m_orientation.z) * p.x + (1.0f - 2.0f * tm.m_orientation.x * tm.m_orientation.x - 2.0f * tm.m_orientation.z * tm.m_orientation.z) * p.y + (2.0f * tm.m_orientation.y * tm.m_orientation.z - 2.0f * tm.m_orientation.w * tm.m_orientation.x) * p.z;
+	out.z = tm.m_vPosition.z + (2.0f * tm.m_orientation.x * tm.m_orientation.z - 2.0f * tm.m_orientation.w * tm.m_orientation.y) * p.x + (2.0f * tm.m_orientation.y * tm.m_orientation.z + 2.0f * tm.m_orientation.w * tm.m_orientation.x) * p.y + (1.0f - 2.0f * tm.m_orientation.x * tm.m_orientation.x - 2.0f * tm.m_orientation.y * tm.m_orientation.y) * p.z;
+}
+
+template < class T >
+inline void RotatePoint(const T& tm, const Vector3D& p, Vector3D& out)
+{
+	out.x = (1.0f - 2.0f * tm.m_orientation.y * tm.m_orientation.y - 2.0f * tm.m_orientation.z * tm.m_orientation.z) * p.x + (2.0f * tm.m_orientation.x * tm.m_orientation.y - 2.0f * tm.m_orientation.w * tm.m_orientation.z) * p.y + (2.0f * tm.m_orientation.x * tm.m_orientation.z + 2.0f * tm.m_orientation.w * tm.m_orientation.y) * p.z;
+	out.y = (2.0f * tm.m_orientation.x * tm.m_orientation.y + 2.0f * tm.m_orientation.w * tm.m_orientation.z) * p.x + (1.0f - 2.0f * tm.m_orientation.x * tm.m_orientation.x - 2.0f * tm.m_orientation.z * tm.m_orientation.z) * p.y + (2.0f * tm.m_orientation.y * tm.m_orientation.z - 2.0f * tm.m_orientation.w * tm.m_orientation.x) * p.z;
+	out.z = (2.0f * tm.m_orientation.x * tm.m_orientation.z - 2.0f * tm.m_orientation.w * tm.m_orientation.y) * p.x + (2.0f * tm.m_orientation.y * tm.m_orientation.z + 2.0f * tm.m_orientation.w * tm.m_orientation.x) * p.y + (1.0f - 2.0f * tm.m_orientation.x * tm.m_orientation.x - 2.0f * tm.m_orientation.y * tm.m_orientation.y) * p.z;
+}
+
+
+inline const CTransform TransformInvert(const CTransform& in)
+{
+	CTransform out;
+	TransformInvert(in, out);
+	return out;
+}
+
+// Transform equality test
+bool TransformsAreEqual(const CTransform& src1, const CTransform& src2, float flPosTolerance = 1e-2, float flRotTolerance = 1e-1f);
+
+// Computes world-space transforms given local-space transforms + parent info
+// The start of the pTransforms array (nRootTransformCount # of transforms) must be filled with 
+// the root transforms which have no parent. The end of the pTransforms array (nTransformCount # of transforms)
+// must be filled with local-space transforms which are relative to other transforms, including possibly the
+// root transforms. Therefore, (nRootTransformCount + nTransformCount) # of transforms must be passed into pTransforms.
+// Only nTransformCount parent indices should be passed in. 
+// Parent indices are relative to the entire array, so a parent index of 0 indicates the first element
+// of the array, which is always a root transform. -1 parent index is *illegal*
+// Parent indices must always be sorted so that the index transforms earlier in the array.
+// The transforms are modified in-place.
+void TransformToWorldSpace(int nRootTransformCount, int nTransformCount, const int* pParentIndices, CTransform* pTransforms);
+void TransformToParentSpace(int nRootTransformCount, int nTransformCount, const int* pParentIndices, CTransform* pTransforms);
+
+
+inline void CTransform::InitFromQAngles(const QAngle& angles, const Vector3D& vPosition)
+{
+	AngleQuaternion(angles, m_orientation);
+	m_vPosition = vPosition;
+}
+
+inline void CTransform::InitFromMatrix(const matrix3x4_t& transform)
+{
+	m_orientation = MatrixQuaternion(transform);
+	m_vPosition = transform.GetOrigin();
+}
+
+inline void CTransform::InitFromQuaternion(const Quaternion& orientation, const Vector3D& vPosition)
+{
+	m_orientation = orientation;
+	m_vPosition = vPosition;
+}
+
+inline void CTransform::SetAngles(QAngle const& vAngles)
+{
+	AngleQuaternion(vAngles, m_orientation);
+}
+
+inline Quaternion CTransform::ToQuaternion() const
+{
+	return m_orientation;
+}
+inline QAngle CTransform::ToQAngle() const
+{
+	QAngle angles;
+	QuaternionAngles(m_orientation, angles);
+	return angles;
+}
+
+inline matrix3x4_t CTransform::ToMatrix() const
+{
+	return TransformMatrix(*this);
+}
+
+inline void CTransform::SetToIdentity()
+{
+	m_vPosition = vec3_origin;
+	m_orientation = quat_identity;
+}
+
+inline void CTransform::GetBasisVectorsFLU(Vector3D* pForward, Vector3D* pLeft, Vector3D* pUp) const
+{
+	TransformVectorsFLU(*this, pForward, pLeft, pUp);
+}
+
+inline Vector3D CTransform::GetForward() const
+{
+	Vector3D vForward;
+	TransformVectorsForward(*this, &vForward);
+	return vForward;
+}
+
+inline Vector3D CTransform::TransformVector(const Vector3D& v0) const
+{
+	return TransformPoint(*this, v0);
+}
+
+inline Vector3D CTransform::RotateVector(const Vector3D& v0) const
+{
+	Vector3D vOut;
+	RotatePoint(*this, v0, vOut);
+	return vOut;
+}
+
+inline Vector3D CTransform::TransformVectorByInverse(const Vector3D& v0) const
+{
+	Vector3D vOut;
+	VectorITransform(v0, *this, vOut);
+	return vOut;
+}
+
+inline Vector3D CTransform::RotateVectorByInverse(const Vector3D& v0) const
+{
+	Vector3D vOut;
+	VectorIRotate(v0, *this, vOut);
+	return vOut;
+}
+
+inline bool CTransform::operator==(const CTransform& t) const
+{
+	return t.m_vPosition == m_vPosition && t.m_orientation == m_orientation;
+}
+
+inline bool CTransform::operator!=(const CTransform& t) const
+{
+	return t.m_vPosition != m_vPosition || t.m_orientation != m_orientation;
+}
+
+// PERFORMANCE: No native versions of these but implement them on matrix for convenient access
+inline void CTransform::TransformAABB(const Vector3D& vecMinsIn, const Vector3D& vecMaxsIn, Vector3D& vecMinsOut, Vector3D& vecMaxsOut) const
+{
+	ToMatrix().TransformAABB(vecMinsIn, vecMaxsIn, vecMinsOut, vecMaxsOut);
+}
+
+inline void CTransform::TransformAABBByInverse(const Vector3D& vecMinsIn, const Vector3D& vecMaxsIn, Vector3D& vecMinsOut, Vector3D& vecMaxsOut) const
+{
+	ToMatrix().TransformAABBByInverse(vecMinsIn, vecMaxsIn, vecMinsOut, vecMaxsOut);
+}
+
+inline void CTransform::RotateAABB(const Vector3D& vecMinsIn, const Vector3D& vecMaxsIn, Vector3D& vecMinsOut, Vector3D& vecMaxsOut) const
+{
+	ToMatrix().RotateAABB(vecMinsIn, vecMaxsIn, vecMinsOut, vecMaxsOut);
+}
+inline void CTransform::RotateAABBByInverse(const Vector3D& vecMinsIn, const Vector3D& vecMaxsIn, Vector3D& vecMinsOut, Vector3D& vecMaxsOut) const
+{
+	ToMatrix().RotateAABBByInverse(vecMinsIn, vecMaxsIn, vecMinsOut, vecMaxsOut);
+}
+
+inline void CTransform::InverseTR(CTransform& out) const
+{
+	matrix3x4_t xForm = ToMatrix();
+	out = xForm.InverseTR().ToCTransform();
+}
+
+
+// transform conversion operators on matrix3x4_t
+inline void matrix3x4_t::InitFromCTransform(const CTransform& transform)
+{
+	TransformMatrix(transform, *this);
+}
+inline CTransform matrix3x4_t::ToCTransform() const
+{
+	return MatrixTransform(*this);
+}
+
+
+#endif // TRANSFORM
--- a/r5dev/mathlib/vector.h
+++ b/r5dev/mathlib/vector.h
@ -8,7 +8,6 @@

 #ifndef VECTOR_H
 #define VECTOR_H
-#define NO_MALLOC_OVERRIDE

 #ifdef _WIN32
 #pragma once
@ -23,7 +22,7 @@
 #if defined( _PS3 )
 //#include <ssemath.h>
 #include <vectormath/c/vectormath_aos.h>
-#include "platform.h"
+#include "tier0/platform.h"
 #include "mathlib/math_pfns.h"
 #endif

@ -36,16 +35,19 @@
 #define ALIGN16_POST
 #endif

+#define NO_MALLOC_OVERRIDE
 #if !defined(NO_MALLOC_OVERRIDE)
 #include "tier0/memalloc.h"
 #endif // !NO_MALLOC_OVERRIDE
 #include "tier0/dbg.h"
 #include "tier0/platform.h"
+#if !defined( __SPU__ )
 #include "tier0/threadtools.h"
+#endif
 #include "mathlib/vector2d.h"
 #include "mathlib/math_pfns.h"
-#include "mathlib/bits.h"
 #include "vstdlib/random.h"
+
 // Uncomment this to add extra Asserts to check for NANs, uninitialized vecs, etc.
 //#define VECTOR_PARANOIA	1

@ -92,6 +94,7 @@ public:

   // Got any nasty NAN's?
 	bool IsValid() const;
+	bool IsReasonable(float range = 1000000) const;		///< Check for reasonably-sized values (if used as a game world position)
 	void Invalidate();

 	// array access...
@ -157,13 +160,15 @@ public:
 	inline bool IsZeroFast() const RESTRICT
 	{
 		static_assert(sizeof(vec_t) == sizeof(int));
-		return (*(const int*)(&x) == 0 &&
-			*(const int*)(&y) == 0 &&
-			*(const int*)(&z) == 0);
+		return (*reinterpret_cast<const int*>(&x) == 0 &&
+			*reinterpret_cast<const int*>(&y) == 0 &&
+			*reinterpret_cast<const int*>(&z) == 0);
 	}

-	vec_t	NormalizeInPlace();
-	Vector3D	Normalized() const;
+	vec_t	NormalizeInPlace();								///< Normalize all components
+	vec_t	NormalizeInPlaceSafe(const Vector3D& vFallback);///< Normalize all components
+	Vector3D	Normalized() const;								///< Return normalized vector
+	Vector3D	NormalizedSafe(const Vector3D& vFallback)const;		///< Return normalized vector, falling back to vFallback if the length of this is 0
 	bool	IsLengthGreaterThan(float val) const;
 	bool	IsLengthLessThan(float val) const;

@ -203,6 +208,9 @@ public:

 	// returns 0, 1, 2 corresponding to the component with the largest absolute value
 	inline int LargestComponent() const;
+	inline vec_t LargestComponentValue() const;
+	inline int SmallestComponent() const;
+	inline vec_t SmallestComponentValue() const;

 	// 2d
 	vec_t	Length2D(void) const;
@ -243,7 +251,8 @@ private:
 #endif
 };

-
+// Zero the object -- necessary for CNetworkVar and possibly other cases.
+inline void EnsureValidValue(Vector3D& x) { x.Zero(); }

 #define USE_M64S defined( PLATFORM_WINDOWS_PC )

@ -608,8 +617,14 @@ Vector3D RandomVector(vec_t minVal, vec_t maxVal);
 #endif

 float RandomVectorInUnitSphere(Vector3D* pVector);
+Vector3D RandomVectorInUnitSphere();
+Vector3D RandomVectorInUnitSphere(IUniformRandomStream* pRnd);
+
 float RandomVectorInUnitCircle(Vector2D* pVector);

+Vector3D RandomVectorOnUnitSphere();
+Vector3D RandomVectorOnUnitSphere(IUniformRandomStream* pRnd);
+

 //-----------------------------------------------------------------------------
 //
@ -666,6 +681,7 @@ inline void Vector3D::Init(vec_t ix, vec_t iy, vec_t iz)
 	CHECK_VALID(*this);
 }

+#if !defined(__SPU__)
 inline void Vector3D::Random(vec_t minVal, vec_t maxVal)
 {
 	x = RandomFloat(minVal, maxVal);
@ -673,6 +689,7 @@ inline void Vector3D::Random(vec_t minVal, vec_t maxVal)
 	z = RandomFloat(minVal, maxVal);
 	CHECK_VALID(*this);
 }
+#endif

 // This should really be a single opcode on the PowerPC (move r0 onto the vec reg)
 inline void Vector3D::Zero()
@ -749,6 +766,14 @@ inline bool Vector3D::IsValid() const
 	return IsFinite(x) && IsFinite(y) && IsFinite(z);
 }

+//-----------------------------------------------------------------------------
+// IsReasonable?
+//-----------------------------------------------------------------------------
+inline bool Vector3D::IsReasonable(float range) const
+{
+	return (Length() < range);
+}
+
 //-----------------------------------------------------------------------------
 // Invalidate
 //-----------------------------------------------------------------------------
@ -1290,9 +1315,10 @@ inline Vector3D VectorLerp(const Vector3D& src1, const Vector3D& src2, vec_t t)
 //-----------------------------------------------------------------------------
 // Temporary storage for vector results so const Vector& results can be returned
 //-----------------------------------------------------------------------------
-/*inline Vector& AllocTempVector()
+#if !defined(__SPU__)
+inline Vector3D& AllocTempVector()
 {
-	static Vector s_vecTemp[128];
+	static Vector3D s_vecTemp[128];
 	static CInterlockedInt s_nIndex;

 	int nIndex;
@ -1307,9 +1333,9 @@ inline Vector3D VectorLerp(const Vector3D& src1, const Vector3D& src2, vec_t t)
 		}
 		ThreadPause();
 	}
-	return s_vecTemp[nIndex & 0xffff];
-}*/
-
+	return s_vecTemp[nIndex];
+}
+#endif


 //-----------------------------------------------------------------------------
@ -1345,6 +1371,40 @@ inline int Vector3D::LargestComponent() const
 	return Z_INDEX;
 }

+inline int Vector3D::SmallestComponent() const
+{
+	float flAbsx = fabs(x);
+	float flAbsy = fabs(y);
+	float flAbsz = fabs(z);
+	if (flAbsx < flAbsy)
+	{
+		if (flAbsx < flAbsz)
+			return X_INDEX;
+		return Z_INDEX;
+	}
+	if (flAbsy < flAbsz)
+		return Y_INDEX;
+	return Z_INDEX;
+}
+
+
+inline float Vector3D::LargestComponentValue() const
+{
+	float flAbsX = fabs(x);
+	float flAbsY = fabs(y);
+	float flAbsZ = fabs(z);
+	return MAX(MAX(flAbsX, flAbsY), flAbsZ);
+}
+
+inline float Vector3D::SmallestComponentValue() const
+{
+	float flAbsX = fabs(x);
+	float flAbsY = fabs(y);
+	float flAbsZ = fabs(z);
+	return MIN(MIN(flAbsX, flAbsY), flAbsZ);
+}
+
+
 inline void CrossProduct(const Vector3D& a, const Vector3D& b, Vector3D& result)
 {
 	CHECK_VALID(a);
@ -1390,9 +1450,9 @@ inline vec_t Vector3D::Length(void) const
 // Normalization
 //-----------------------------------------------------------------------------

-
+/*
 // FIXME: Can't use until we're un-macroed in mathlib.h
-inline vec_t VectorNormalize( Vector3D& v )
+inline vec_t VectorNormalize( Vector& v )
 {
 	Assert( v.IsValid() );
 	vec_t l = v.Length();
@ -1408,7 +1468,7 @@ inline vec_t VectorNormalize( Vector3D& v )
 	}
 	return l;
 }
-
+*/


 // check a point against a box
@ -1432,6 +1492,35 @@ inline vec_t Vector3D::DistTo(const Vector3D& vOther) const
 }


+//-----------------------------------------------------------------------------
+// Float equality with tolerance
+//-----------------------------------------------------------------------------
+inline bool FloatsAreEqual(float f1, float f2, float flTolerance)
+{
+	// Sergiy: the implementation in Source2 is very inefficient, trying to start with a clean slate here, hopefully will reintegrate back to Source2
+	const float flAbsToleranceThreshold = 0.000003814697265625; // 2 ^ -FLOAT_EQUALITY_NOISE_CUTOFF, 
+	return fabsf(f1 - f2) <= flTolerance * (fabsf(f1) + fabsf(f2)) + flAbsToleranceThreshold;
+}
+
+
+//-----------------------------------------------------------------------------
+// Vector equality with percentage tolerance
+// are all components within flPercentageTolerance (expressed as a percentage of the larger component, per component)?
+// and all components have the same sign
+//-----------------------------------------------------------------------------
+inline bool VectorsAreWithinPercentageTolerance(const Vector3D& src1, const Vector3D& src2, float flPercentageTolerance)
+{
+	if (!FloatsAreEqual(src1.x, src2.x, flPercentageTolerance))
+		return false;
+
+	if (!FloatsAreEqual(src1.y, src2.y, flPercentageTolerance))
+		return false;
+
+	return (FloatsAreEqual(src1.z, src2.z, flPercentageTolerance));
+}
+
+
+
 //-----------------------------------------------------------------------------
 // Vector equality with tolerance
 //-----------------------------------------------------------------------------
@ -1475,6 +1564,11 @@ inline void VectorAbs(const Vector3D& src, Vector3D& dst)
 	dst.z = FloatMakePositive(src.z);
 }

+inline Vector3D VectorAbs(const Vector3D& src)
+{
+	return Vector3D(fabsf(src.x), fabsf(src.y), fabsf(src.z));
+}
+

 //-----------------------------------------------------------------------------
 //
@ -1620,6 +1714,7 @@ inline float ComputeVolume(const Vector3D& vecMins, const Vector3D& vecMaxs)
 	return DotProduct(vecDelta, vecDelta);
 }

+#if !defined(__SPU__)
 // Get a random vector.
 inline Vector3D RandomVector(float minVal, float maxVal)
 {
@ -1627,6 +1722,7 @@ inline Vector3D RandomVector(float minVal, float maxVal)
 	random.Random(minVal, maxVal);
 	return random;
 }
+#endif

 #endif //slow

@ -1668,6 +1764,13 @@ inline bool operator!=(const Vector3D& v, float const* f)
 // you won't get an "u
 void VectorPerpendicularToVector(Vector3D const& in, Vector3D* pvecOut);

+inline const Vector3D VectorPerpendicularToVector(const Vector3D& in)
+{
+	Vector3D out;
+	VectorPerpendicularToVector(in, &out);
+	return out;
+}
+
 //-----------------------------------------------------------------------------
 // AngularImpulse
 //-----------------------------------------------------------------------------
@ -1676,12 +1779,14 @@ typedef Vector3D AngularImpulse;

 #ifndef VECTOR_NO_SLOW_OPERATIONS

+#if !defined(__SPU__)
 inline AngularImpulse RandomAngularImpulse(float minVal, float maxVal)
 {
 	AngularImpulse	angImp;
 	angImp.Random(minVal, maxVal);
 	return angImp;
 }
+#endif

 #endif

@ -1691,6 +1796,8 @@ inline AngularImpulse RandomAngularImpulse(float minVal, float maxVal)
 //-----------------------------------------------------------------------------

 class RadianEuler;
+class DegreeEuler;
+class QAngle;

 class Quaternion				// same data-layout as engine's vec4_t,
 {								//		which is a vec_t[4]
@ -1705,9 +1812,11 @@ public:
 #endif
 	}
 	inline Quaternion(vec_t ix, vec_t iy, vec_t iz, vec_t iw) : x(ix), y(iy), z(iz), w(iw) { }
-	inline Quaternion(RadianEuler const& angle);	// evil auto type promotion!!!
+	inline explicit Quaternion(RadianEuler const& angle);
+	inline explicit Quaternion(DegreeEuler const& angle);

 	inline void Init(vec_t ix = 0.0f, vec_t iy = 0.0f, vec_t iz = 0.0f, vec_t iw = 0.0f) { x = ix; y = iy; z = iz; w = iw; }
+	inline void Init(const Vector3D& vImaginaryPart, float flRealPart) { x = vImaginaryPart.x; y = vImaginaryPart.y; z = vImaginaryPart.z; w = flRealPart; }

 	bool IsValid() const;
 	void Invalidate();
@ -1717,19 +1826,47 @@ public:

 	inline Quaternion Conjugate() const { return Quaternion(-x, -y, -z, w); }

+	// 
+	const Vector3D GetForward()const;
+	const Vector3D GetLeft()const;
+	const Vector3D GetUp()const;
+
 	vec_t* Base() { return (vec_t*)this; }
 	const vec_t* Base() const { return (vec_t*)this; }

 	// convenience for debugging
 	inline void Print() const;

+	// Imaginary part
+	Vector3D& ImaginaryPart() { return *(Vector3D*)this; }
+	const Vector3D& ImaginaryPart() const { return *(Vector3D*)this; }
+	float& RealPart() { return w; }
+	float RealPart() const { return w; }
+	inline QAngle ToQAngle() const;
+	inline struct matrix3x4_t ToMatrix() const;
+
 	// array access...
 	vec_t operator[](int i) const;
 	vec_t& operator[](int i);

+	inline Quaternion operator+(void) const { return *this; }
+	inline Quaternion operator-(void) const { return Quaternion(-x, -y, -z, -w); }
+
 	vec_t x, y, z, w;
 };

+// Random Quaternion that is UNIFORMLY distributed over the S^3
+// should be good for random generation of orientation for unit tests and for game
+// NOTE: Nothing trivial like Quaternion(RandomAngle(0,180)) will do the trick , 
+//       one needs to take special care to generate a uniformly distributed quaternion.
+const Quaternion RandomQuaternion();
+const Quaternion RandomQuaternion();
+inline const Quaternion Conjugate(const Quaternion& q)
+{
+	return Quaternion(-q.x, -q.y, -q.z, q.w);
+}
+
+

 //-----------------------------------------------------------------------------
 // Array access
@ -1767,10 +1904,45 @@ inline bool Quaternion::operator!=(const Quaternion& src) const
 void Quaternion::Print() const
 {
 #ifndef _CERT
+#if !defined(__SPU__)
 	DevMsg(eDLL_T::ENGINE, "q{ %.3fi + %.3fj + %.3fk + %.3f }", x, y, z, w);
 #endif
+#endif
 }

+
+
+
+//-----------------------------------------------------------------------------
+// Binaray operators
+//-----------------------------------------------------------------------------
+inline Quaternion operator+(const Quaternion& q1, const Quaternion& q2)
+{
+	return Quaternion(q1.x + q2.x, q1.y + q2.y, q1.z + q2.z, q1.w + q2.w);
+}
+
+inline Quaternion operator-(const Quaternion& q1, const Quaternion& q2)
+{
+	return Quaternion(q1.x - q2.x, q1.y - q2.y, q1.z - q2.z, q1.w - q2.w);
+}
+
+inline Quaternion operator*(float s, const Quaternion& q)
+{
+	return Quaternion(s * q.x, s * q.y, s * q.z, s * q.w);
+}
+
+inline Quaternion operator*(const Quaternion& q, float s)
+{
+	return Quaternion(q.x * s, q.y * s, q.z * s, q.w * s);
+}
+
+inline Quaternion operator/(const Quaternion& q, float s)
+{
+	Assert(s != 0.0f);
+	return Quaternion(q.x / s, q.y / s, q.z / s, q.w / s);
+}
+
+
 //-----------------------------------------------------------------------------
 // Quaternion equality with tolerance
 //-----------------------------------------------------------------------------
@ -1898,17 +2070,35 @@ public:
 #endif
 } ALIGN16_POST;

+
+//-----------------------------------------------------------------------------
+// Src data hasn't changed, but work data is of a form more friendly for SPU
+//-----------------------------------------------------------------------------
+#if defined( _PS3 )
+//typedef Vector		BoneVector;
+typedef VectorAligned		BoneVector;
+typedef QuaternionAligned	BoneQuaternion;
+typedef QuaternionAligned	BoneQuaternionAligned;
+#else
+typedef Vector3D				BoneVector;
+typedef Quaternion			BoneQuaternion;
+typedef QuaternionAligned	BoneQuaternionAligned;
+#endif
+
 //-----------------------------------------------------------------------------
 // Radian Euler angle aligned to axis (NOT ROLL/PITCH/YAW)
 //-----------------------------------------------------------------------------
 class QAngle;
+#define VEC_DEG2RAD( a ) (a) * (3.14159265358979323846f / 180.0f)
+#define VEC_RAD2DEG( a ) (a) * (180.0f / 3.14159265358979323846f)
 class RadianEuler
 {
 public:
 	inline RadianEuler(void) { }
 	inline RadianEuler(vec_t X, vec_t Y, vec_t Z) { x = X; y = Y; z = Z; }
-	inline RadianEuler(Quaternion const& q);	// evil auto type promotion!!!
-	inline RadianEuler(QAngle const& angles);	// evil auto type promotion!!!
+	inline explicit RadianEuler(Quaternion const& q);
+	inline explicit RadianEuler(QAngle const& angles);
+	inline explicit RadianEuler(DegreeEuler const& angles);

 	// Initialization
 	inline void Init(vec_t ix = 0.0f, vec_t iy = 0.0f, vec_t iz = 0.0f) { x = ix; y = iy; z = iz; }
@ -1941,6 +2131,18 @@ inline bool Quaternion::IsValid() const
 	return IsFinite(x) && IsFinite(y) && IsFinite(z) && IsFinite(w);
 }

+
+FORCEINLINE float QuaternionLength(const Quaternion& q)
+{
+	return sqrtf(q.x * q.x + q.y * q.y + q.z * q.z + q.w * q.w);
+}
+
+FORCEINLINE bool QuaternionIsNormalized(const Quaternion& q, float  flTolerance = 1e-6f)
+{
+	float flLen = QuaternionLength(q);
+	return (fabs(flLen - 1.0) < flTolerance);
+}
+
 inline void Quaternion::Invalidate()
 {
 	//#ifdef _DEBUG
@ -2003,6 +2205,116 @@ inline vec_t RadianEuler::operator[](int i) const
 }


+//-----------------------------------------------------------------------------
+// Degree Euler angle aligned to axis (NOT ROLL/PITCH/YAW)
+//-----------------------------------------------------------------------------
+class DegreeEuler
+{
+public:
+	///\name Initialization 
+	//@{
+	inline DegreeEuler(void) ///< Create with un-initialized components. If VECTOR_PARANOIA is set, will init with NANS.
+	{
+		// Initialize to NAN to catch errors
+#ifdef VECTOR_PARANOIA
+		x = y = z = VEC_T_NAN;
+#endif
+	}
+	inline DegreeEuler(vec_t X, vec_t Y, vec_t Z) { x = X; y = Y; z = Z; }
+	inline explicit DegreeEuler(Quaternion const& q);
+	inline explicit DegreeEuler(QAngle const& angles);
+	inline explicit DegreeEuler(RadianEuler const& angles);
+
+	// Initialization
+	inline void Init(vec_t ix = 0.0f, vec_t iy = 0.0f, vec_t iz = 0.0f) { x = ix; y = iy; z = iz; }
+
+	inline QAngle ToQAngle() const;
+
+	//	conversion to qangle
+	bool IsValid() const;
+	void Invalidate();
+
+	inline vec_t* Base() { return &x; }
+	inline const vec_t* Base() const { return &x; }
+
+	// array access...
+	vec_t operator[](int i) const;
+	vec_t& operator[](int i);
+
+	vec_t x, y, z;
+};
+
+
+//-----------------------------------------------------------------------------
+// DegreeEuler equality with tolerance
+//-----------------------------------------------------------------------------
+inline bool DegreeEulersAreEqual(const DegreeEuler& src1, const DegreeEuler& src2, float tolerance = 0.0f)
+{
+	if (FloatMakePositive(src1.x - src2.x) > tolerance)
+		return false;
+	if (FloatMakePositive(src1.y - src2.y) > tolerance)
+		return false;
+	return (FloatMakePositive(src1.z - src2.z) <= tolerance);
+}
+
+/*
+extern void AngleQuaternion( DegreeEuler const &angles, Quaternion &qt );
+extern void QuaternionAngles( Quaternion const &q, DegreeEuler &angles );
+extern void QuaternionVectorsFLU( Quaternion const &q, Vector *pForward, Vector *pLeft, Vector *pUp );
+*/
+
+inline Quaternion::Quaternion(DegreeEuler const& angles)
+{
+	RadianEuler radians(angles);
+	AngleQuaternion(radians, *this);
+}
+
+inline DegreeEuler::DegreeEuler(RadianEuler const& angles)
+{
+	Init(VEC_RAD2DEG(angles.x), VEC_RAD2DEG(angles.y), VEC_RAD2DEG(angles.z));
+}
+
+inline RadianEuler::RadianEuler(DegreeEuler const& angles)
+{
+	Init(VEC_DEG2RAD(angles.x), VEC_DEG2RAD(angles.y), VEC_DEG2RAD(angles.z));
+}
+
+inline DegreeEuler::DegreeEuler(Quaternion const& q)
+{
+	RadianEuler radians(q);
+	Init(VEC_RAD2DEG(radians.x), VEC_RAD2DEG(radians.y), VEC_RAD2DEG(radians.z));
+}
+
+inline bool DegreeEuler::IsValid() const
+{
+	return IsFinite(x) && IsFinite(y) && IsFinite(z);
+}
+
+inline void DegreeEuler::Invalidate()
+{
+	//#ifdef VECTOR_PARANOIA
+	x = y = z = VEC_T_NAN;
+	//#endif
+}
+
+
+//-----------------------------------------------------------------------------
+// Array access
+//-----------------------------------------------------------------------------
+inline vec_t& DegreeEuler::operator[](int i)
+{
+	Assert((i >= 0) && (i < 3));
+	return ((vec_t*)this)[i];
+}
+
+inline vec_t DegreeEuler::operator[](int i) const
+{
+	Assert((i >= 0) && (i < 3));
+	return ((vec_t*)this)[i];
+}
+
+
+
 //-----------------------------------------------------------------------------
 // Degree Euler QAngle pitch, yaw, roll
 //-----------------------------------------------------------------------------
@ -2061,6 +2373,12 @@ public:
 	// No assignment operators either...
 	QAngle& operator=(const QAngle& src);

+	void Normalize();
+	void NormalizePositive();
+
+	inline struct matrix3x4_t ToMatrix() const;
+	inline Quaternion ToQuaternion() const;
+
 #ifndef VECTOR_NO_SLOW_OPERATIONS
 	// copy constructors

@ -2080,6 +2398,9 @@ private:
 #endif
 };

+// Zero the object -- necessary for CNetworkVar and possibly other cases.
+inline void EnsureValidValue(QAngle& x) { x.Init(); }
+
 //-----------------------------------------------------------------------------
 // Allows us to specifically pass the vector by value when we need to
 //-----------------------------------------------------------------------------
@ -2141,6 +2462,26 @@ inline void QAngle::Init(vec_t ix, vec_t iy, vec_t iz)
 	CHECK_VALID(*this);
 }

+
+extern float AngleNormalize(float angle);
+extern float AngleNormalizePositive(float angle);
+
+inline void QAngle::Normalize()
+{
+	x = AngleNormalize(x);
+	y = AngleNormalize(y);
+	z = AngleNormalize(z);
+}
+
+inline void QAngle::NormalizePositive()
+{
+	x = AngleNormalizePositive(x);
+	y = AngleNormalizePositive(y);
+	z = AngleNormalizePositive(z);
+}
+
+
+#if !defined(__SPU__)
 inline void QAngle::Random(vec_t minVal, vec_t maxVal)
 {
 	x = RandomFloat(minVal, maxVal);
@ -2148,9 +2489,11 @@ inline void QAngle::Random(vec_t minVal, vec_t maxVal)
 	z = RandomFloat(minVal, maxVal);
 	CHECK_VALID(*this);
 }
+#endif

 #ifndef VECTOR_NO_SLOW_OPERATIONS

+#if !defined(__SPU__)
 inline QAngle RandomAngle(float minVal, float maxVal)
 {
 	Vector3D random;
@ -2158,6 +2501,7 @@ inline QAngle RandomAngle(float minVal, float maxVal)
 	QAngle ret(random.x, random.y, random.z);
 	return ret;
 }
+#endif

 #endif

@ -2169,17 +2513,22 @@ inline RadianEuler::RadianEuler(QAngle const& angles)
 		angles.y * 3.14159265358979323846f / 180.f);
 }

-
-
+inline DegreeEuler::DegreeEuler(QAngle const& angles)
+{
+	Init(angles.z, angles.x, angles.y);
+}

 inline QAngle RadianEuler::ToQAngle(void) const
 {
-	return QAngle(
-		y * 180.f / 3.14159265358979323846f,
-		z * 180.f / 3.14159265358979323846f,
-		x * 180.f / 3.14159265358979323846f);
+	return QAngle(VEC_RAD2DEG(y), VEC_RAD2DEG(z), VEC_RAD2DEG(x));
 }

+inline QAngle DegreeEuler::ToQAngle() const
+{
+	return QAngle(y, z, x);
+}
+
+
 //-----------------------------------------------------------------------------
 // assignment
 //-----------------------------------------------------------------------------
@ -2415,6 +2764,15 @@ inline void AngularImpulseToQAngle(const AngularImpulse& impulse, QAngle& angles
 	angles.z = impulse.x;
 }

+inline QAngle Quaternion::ToQAngle() const
+{
+	extern void QuaternionAngles(const Quaternion & q, QAngle & angles);
+
+	QAngle anglesOut;
+	QuaternionAngles(*this, anglesOut);
+	return anglesOut;
+}
+
 #if !defined( _X360 ) && !defined( _PS3 )

 FORCEINLINE vec_t InvRSquared(const float* v)
@ -2430,7 +2788,11 @@ FORCEINLINE vec_t InvRSquared(const Vector3D& v)
 #else

 // call directly
+#if defined(__SPU__)
+FORCEINLINE float _VMX_InvRSquared(Vector& v)
+#else
 FORCEINLINE float _VMX_InvRSquared(const Vector& v)
+#endif
 {
 #if !defined (_PS3)
 	XMVECTOR xmV = XMVector3ReciprocalLength(XMLoadVector3(v.Base()));
@ -2616,6 +2978,16 @@ inline vec_t Vector3D::NormalizeInPlace()
 	return VectorNormalize(*this);
 }

+inline vec_t Vector3D::NormalizeInPlaceSafe(const Vector3D& vFallback)
+{
+	float flLength = VectorNormalize(*this);
+	if (flLength == 0.0f)
+	{
+		*this = vFallback;
+	}
+	return flLength;
+}
+
 inline Vector3D Vector3D::Normalized() const
 {
 	Vector3D norm = *this;
@ -2623,6 +2995,15 @@ inline Vector3D Vector3D::Normalized() const
 	return norm;
 }

+
+inline Vector3D Vector3D::NormalizedSafe(const Vector3D& vFallback)const
+{
+	Vector3D vNorm = *this;
+	float flLength = VectorNormalize(vNorm);
+	return (flLength != 0.0f) ? vNorm : vFallback;
+}
+
+
 inline bool Vector3D::IsLengthGreaterThan(float val) const
 {
 	return LengthSqr() > val * val;
@ -2633,5 +3014,68 @@ inline bool Vector3D::IsLengthLessThan(float val) const
 	return LengthSqr() < val * val;
 }

+
+inline const Vector3D ScaleVector(const Vector3D& a, const Vector3D& b)
+{
+	return Vector3D(a.x * b.x, a.y * b.y, a.z * b.z);
+}
+
+
+
+inline const Quaternion Exp(const Vector3D& v)
+{
+	float theta = v.Length();
+	if (theta < 0.001f)
+	{
+		// limit case, cos(theta)       ~= 1 - theta^2/2 + theta^4/24
+		//             sin(theta)/theta ~= 1 - theta^2/6 + theta^4/120
+		float theta2_2 = theta * theta * 0.5f, theta4_24 = theta2_2 * theta2_2 * (1.0f / 6.0f);
+		float k = 1.0f - theta2_2 * (1.0f / 3.0f) + theta4_24 * 0.05f;
+		return Quaternion(k * v.x, k * v.y, k * v.z, 1 - theta2_2 + theta4_24);
+	}
+	else
+	{
+		float k = sinf(theta) / theta;
+		return Quaternion(k * v.x, k * v.y, k * v.z, cosf(theta));
+	}
+}
+
+
+inline const Vector3D QuaternionLog(const Quaternion& q)
+{
+	Vector3D axis = q.ImaginaryPart();
+	float sinTheta = axis.Length(), factor;
+	if (sinTheta > 0.001f)
+	{
+		// there's some substantial rotation; if w < 0, it's an over-180-degree rotation (in real space)
+		float theta = asinf(MIN(sinTheta, 1.0f));
+		factor = (q.w < 0.0f ? M_PI_F - theta : theta) / sinTheta;
+	}
+	else
+	{
+		// ArcSin[x]/x = 1 + x^2/6 + x^4 * 3/40 + o( x^5 )
+		float sinTheta2 = sinTheta * sinTheta;
+		float sinTheta4 = sinTheta2 * sinTheta2;
+		factor = (1 + sinTheta2 * (1.0f / 6.0f) + sinTheta4 * (3.0f / 40.0f));
+		if (q.w < 0)
+		{
+			factor = -factor; // because the axis of rotation is not defined, we'll just consider this rotation to be close enough to identity
+		}
+	}
+	return axis * factor;
+}
+
+
+
+inline float Snap(float a, float flSnap)
+{
+	return floorf(a / flSnap + 0.5f) * flSnap;
+}
+
+inline  const Vector3D Snap(const Vector3D& a, float flSnap)
+{
+	return Vector3D(Snap(a.x, flSnap), Snap(a.y, flSnap), Snap(a.z, flSnap));
+}
+
 #endif

--- a/r5dev/mathlib/vector2d.h
+++ b/r5dev/mathlib/vector2d.h
@ -1,4 +1,4 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//========= Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ============//
 //
 // Purpose: 
 //
@ -19,13 +19,27 @@
 // For vec_t, put this somewhere else?
 #include "tier0/basetypes.h"

-// For rand(). We really need a library!
-#include <stdlib.h>
+// For RandomFloat()
+#include "vstdlib/random.h"

 #include "tier0/dbg.h"
 #include "mathlib/bits.h"
 #include "mathlib/math_pfns.h"

+#ifndef M_PI
+#define M_PI		3.14159265358979323846	// matches value in gcc v2 math.h
+#endif
+
+#ifndef M_PI_F
+#define M_PI_F		((float)(M_PI))
+#endif
+
+#ifndef DEG2RAD
+#define DEG2RAD( x  )  ( (float)(x) * (float)(M_PI_F / 180.f) )
+#endif
+
+extern void inline SinCos(float radians, float* RESTRICT sine, float* RESTRICT cosine);
+
 //=========================================================
 // 2D Vector2D
 //=========================================================
@ -37,9 +51,9 @@ public:
 	vec_t x, y;

 	// Construction/destruction
-	Vector2D(void);
+	Vector2D();
 	Vector2D(vec_t X, vec_t Y);
-	Vector2D(const float* pFloat);
+	explicit Vector2D(const float* pFloat);

 	// Initialization
 	void Init(vec_t ix = 0.0f, vec_t iy = 0.0f);
@ -196,7 +210,7 @@ void Vector2DLerp(const Vector2D& src1, const Vector2D& src2, vec_t t, Vector2D&
 // constructors
 //-----------------------------------------------------------------------------

-inline Vector2D::Vector2D(void)
+inline Vector2D::Vector2D()
 {
 #ifdef _DEBUG
 	// Initialize to NAN to catch errors
@ -238,11 +252,13 @@ inline void Vector2D::Init(vec_t ix, vec_t iy)
 	Assert(IsValid());
 }

+#if !defined(__SPU__)
 inline void Vector2D::Random(float minVal, float maxVal)
 {
-	x = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
-	y = minVal + ((float)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+	x = RandomFloat(minVal, maxVal);
+	y = RandomFloat(minVal, maxVal);
 }
+#endif

 inline void Vector2DClear(Vector2D& a)
 {
@ -439,6 +455,15 @@ inline void Vector2DDivide(const Vector2D& a, const Vector2D& b, Vector2D& c)
 	c.y = a.y / b.y;
 }

+inline void Vector2DRotate(const Vector2D& vIn, float flDegrees, Vector2D& vOut)
+{
+	float c, s;
+	SinCos(DEG2RAD(flDegrees), &s, &c);
+
+	vOut.x = vIn.x * c - vIn.y * s;
+	vOut.y = vIn.x * s + vIn.y * c;
+}
+
 inline void Vector2DMA(const Vector2D& start, float s, const Vector2D& dir, Vector2D& result)
 {
 	Assert(start.IsValid() && IsFinite(s) && dir.IsValid());
--- a/r5dev/mathlib/vector4d.h
+++ b/r5dev/mathlib/vector4d.h
@ -1,4 +1,4 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//========= Copyright 1996-2005, Valve Corporation, All rights reserved. ============//
 //
 // Purpose: 
 //
@ -14,19 +14,19 @@
 #endif

 #include <math.h>
-#include <stdlib.h>		// For rand(). We really need a library!
 #include <float.h>
-#if !defined( _X360 )
-#include <xmmintrin.h>	// For SSE
+#if !defined( PLATFORM_PPC ) && !defined( _PS3 )
+#include <xmmintrin.h>	// for sse
 #endif
 #include "tier0/basetypes.h"	// For vec_t, put this somewhere else?
 #include "tier0/dbg.h"
 #include "mathlib/bits.h"
 #include "mathlib/math_pfns.h"
-
+#include "mathlib/vector.h"
+#include "vstdlib/random.h"
 // forward declarations
-class Vector3D;
 class Vector2D;
+class Vector3D;

 //=========================================================
 // 4D Vector4D
@ -39,12 +39,13 @@ public:
 	vec_t x, y, z, w;

 	// Construction/destruction
-	Vector4D(void);
+	Vector4D();
 	Vector4D(vec_t X, vec_t Y, vec_t Z, vec_t W);
-	Vector4D(const float* pFloat);
+	explicit Vector4D(const float* pFloat);

 	// Initialization
 	void Init(vec_t ix = 0.0f, vec_t iy = 0.0f, vec_t iz = 0.0f, vec_t iw = 0.0f);
+	void Init(const Vector3D& src, vec_t iw = 0.0f);

 	// Got any nasty NAN's?
 	bool IsValid() const;
@ -79,6 +80,13 @@ public:
 	Vector4D& operator/=(const Vector4D& v);
 	Vector4D& operator/=(float s);

+	Vector4D	operator-(void) const;
+	Vector4D	operator*(float fl) const;
+	Vector4D	operator/(float fl) const;
+	Vector4D	operator*(const Vector4D& v) const;
+	Vector4D	operator+(const Vector4D& v) const;
+	Vector4D	operator-(const Vector4D& v) const;
+
 	// negate the Vector4D components
 	void	Negate();

@ -202,7 +210,7 @@ void Vector4DLerp(Vector4D const& src1, Vector4D const& src2, vec_t t, Vector4D&
 // constructors
 //-----------------------------------------------------------------------------

-inline Vector4D::Vector4D(void)
+inline Vector4D::Vector4D()
 {
 #ifdef _DEBUG
 	// Initialize to NAN to catch errors
@ -237,20 +245,27 @@ inline Vector4D::Vector4D(const Vector4D& vOther)
 //-----------------------------------------------------------------------------
 // initialization
 //-----------------------------------------------------------------------------
-
 inline void Vector4D::Init(vec_t ix, vec_t iy, vec_t iz, vec_t iw)
 {
 	x = ix; y = iy; z = iz;	w = iw;
 	Assert(IsValid());
 }

+inline void Vector4D::Init(const Vector3D& src, vec_t iw)
+{
+	x = src.x; y = src.y; z = src.z; w = iw;
+	Assert(IsValid());
+}
+
+#if !defined(__SPU__)
 inline void Vector4D::Random(vec_t minVal, vec_t maxVal)
 {
-	x = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
-	y = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
-	z = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
-	w = minVal + ((vec_t)rand() / VALVE_RAND_MAX) * (maxVal - minVal);
+	x = RandomFloat(minVal, maxVal);
+	y = RandomFloat(minVal, maxVal);
+	z = RandomFloat(minVal, maxVal);
+	w = RandomFloat(minVal, maxVal);
 }
+#endif

 inline void Vector4DClear(Vector4D& a)
 {
@ -412,6 +427,52 @@ inline Vector4D& Vector4D::operator*=(Vector4D const& v)
 	return *this;
 }

+inline Vector4D Vector4D::operator-(void) const
+{
+	return Vector4D(-x, -y, -z, -w);
+}
+
+inline Vector4D Vector4D::operator+(const Vector4D& v) const
+{
+	Vector4D res;
+	Vector4DAdd(*this, v, res);
+	return res;
+}
+
+inline Vector4D Vector4D::operator-(const Vector4D& v) const
+{
+	Vector4D res;
+	Vector4DSubtract(*this, v, res);
+	return res;
+}
+
+
+inline Vector4D Vector4D::operator*(float fl) const
+{
+	Vector4D res;
+	Vector4DMultiply(*this, fl, res);
+	return res;
+}
+
+inline Vector4D Vector4D::operator*(const Vector4D& v) const
+{
+	Vector4D res;
+	Vector4DMultiply(*this, v, res);
+	return res;
+}
+
+inline Vector4D Vector4D::operator/(float fl) const
+{
+	Vector4D res;
+	Vector4DDivide(*this, fl, res);
+	return res;
+}
+
+inline Vector4D operator*(float fl, const Vector4D& v)
+{
+	return v * fl;
+}
+
 inline Vector4D& Vector4D::operator/=(float fl)
 {
 	Assert(fl != 0.0f);
@ -615,8 +676,10 @@ inline void Vector4DAligned::Set(vec_t X, vec_t Y, vec_t Z, vec_t W)

 inline void Vector4DAligned::InitZero(void)
 {
-#if !defined( _X360 )
+#if !defined( PLATFORM_PPC )
 	this->AsM128() = _mm_set1_ps(0.0f);
+#elif defined(_PS3)
+	this->AsM128() = VMX_ZERO;
 #else
 	this->AsM128() = __vspltisw(0);
 #endif
@ -626,11 +689,13 @@ inline void Vector4DAligned::InitZero(void)
 inline void Vector4DMultiplyAligned(Vector4DAligned const& a, Vector4DAligned const& b, Vector4DAligned& c)
 {
 	Assert(a.IsValid() && b.IsValid());
-#if !defined( _X360 )
+#if !defined( PLATFORM_PPC )
 	c.x = a.x * b.x;
 	c.y = a.y * b.y;
 	c.z = a.z * b.z;
 	c.w = a.w * b.w;
+#elif defined(_PS3)
+	c.AsM128() = __vec_mul(a.AsM128(), b.AsM128());
 #else
 	c.AsM128() = __vmulfp(a.AsM128(), b.AsM128());
 #endif
@ -640,7 +705,7 @@ inline void Vector4DWeightMAD(vec_t w, Vector4DAligned const& vInA, Vector4DAlig
 {
 	Assert(vInA.IsValid() && vInB.IsValid() && IsFinite(w));

-#if !defined( _X360 )
+#if !defined( PLATFORM_PPC )
 	vOutA.x += vInA.x * w;
 	vOutA.y += vInA.y * w;
 	vOutA.z += vInA.z * w;
@ -650,6 +715,16 @@ inline void Vector4DWeightMAD(vec_t w, Vector4DAligned const& vInA, Vector4DAlig
 	vOutB.y += vInB.y * w;
 	vOutB.z += vInB.z * w;
 	vOutB.w += vInB.w * w;
+#elif defined(_PS3)
+#if ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ == 1 ) && ( __GNUC_PATCHLEVEL__ == 1 )
+	// GCC 4.1.1
+	__m128 temp = vec_splats(w);
+#else //__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 1
+	__m128 temp = __m128(w);
+#endif //__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 1
+
+	vOutA.AsM128() = vec_madd(vInA.AsM128(), temp, vOutA.AsM128());
+	vOutB.AsM128() = vec_madd(vInB.AsM128(), temp, vOutB.AsM128());
 #else
 	__vector4 temp;

@ -665,13 +740,23 @@ inline void Vector4DWeightMADSSE(vec_t w, Vector4DAligned const& vInA, Vector4DA
 {
 	Assert(vInA.IsValid() && vInB.IsValid() && IsFinite(w));

-#if !defined( _X360 )
+#if !defined( PLATFORM_PPC )
 	// Replicate scalar float out to 4 components
 	__m128 packed = _mm_set1_ps(w);

 	// 4D SSE Vector MAD
 	vOutA.AsM128() = _mm_add_ps(vOutA.AsM128(), _mm_mul_ps(vInA.AsM128(), packed));
 	vOutB.AsM128() = _mm_add_ps(vOutB.AsM128(), _mm_mul_ps(vInB.AsM128(), packed));
+#elif defined(_PS3)
+#if ( __GNUC__ == 4 ) && ( __GNUC_MINOR__ == 1 ) && ( __GNUC_PATCHLEVEL__ == 1 )
+	// GCC 4.1.1
+	__m128 temp = vec_splats(w);
+#else //__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 1
+	__m128 temp = __m128(w);
+#endif //__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ == 1
+
+	vOutA.AsM128() = vec_madd(vInA.AsM128(), temp, vOutA.AsM128());
+	vOutB.AsM128() = vec_madd(vInB.AsM128(), temp, vOutB.AsM128());
 #else
 	__vector4 temp;

--- a/r5dev/mathlib/vmatrix.cpp
+++ b/r5dev/mathlib/vmatrix.cpp
@ -1,4 +1,4 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//========= Copyright (c) 1996-2005, Valve Corporation, All rights reserved. ============//
 //
 // Purpose: 
 //
@ -6,18 +6,19 @@
 //
 //=============================================================================//
 #include "core/stdafx.h"
+#include "tier0/dbg.h"

 #if !defined(_STATIC_LINKED) || defined(_SHARED_LIB)
-#include "tier0/dbg.h"
-#include "tier0/basetypes.h"
+
 #include "mathlib/vmatrix.h"
 #include "mathlib/mathlib.h"
 #include "mathlib/vector4d.h"
+#include "mathlib/ssemath.h"

 // memdbgon must be the last include file in a .cpp file!!!
 //#include "tier0/memdbgon.h"

-//#pragma warning (disable : 4700) // local variable 'x' used without having been initialized
+#pragma warning (disable : 4700) // local variable 'x' used without having been initialized

 // ------------------------------------------------------------------------------------------- //
 // Helper functions.
@ -120,7 +121,7 @@ VMatrix SetupMatrixProjection(const Vector3D& vOrigin, const VPlane& thePlane)

 VMatrix SetupMatrixAxisRot(const Vector3D& vAxis, vec_t fDegrees)
 {
-	vec_t s, c, t;
+	vec_t s, c, t; // sin, cos, 1-cos
 	vec_t tx, ty, tz;
 	vec_t sx, sy, sz;
 	vec_t fRadians;
@ -142,6 +143,43 @@ VMatrix SetupMatrixAxisRot(const Vector3D& vAxis, vec_t fDegrees)
 		0.0f, 0.0f, 0.0f, 1.0f);
 }

+
+// Basically takes a cross product and then does the same thing as SetupMatrixAxisRot
+// above, but takes advantage of the fact that the sin angle is precomputed.
+VMatrix	SetupMatrixAxisToAxisRot(const Vector3D& vFromAxis, const Vector3D& vToAxis)
+{
+	Assert(vFromAxis.LengthSqr() == 1); // these axes
+	Assert(vToAxis.LengthSqr() == 1); // must be normal.
+
+	vec_t s, c, t; // sin(theta), cos(theta), 1-cos
+	vec_t tx, ty, tz;
+	vec_t sx, sy, sz;
+
+	Vector3D vAxis = vFromAxis.Cross(vToAxis);
+
+	s = vAxis.Length();
+	c = vFromAxis.Dot(vToAxis);
+	t = 1.0f - c;
+
+	if (s > 0)
+	{
+		vAxis *= 1.0 / s;
+
+		tx = t * vAxis.x;	ty = t * vAxis.y;	tz = t * vAxis.z;
+		sx = s * vAxis.x;	sy = s * vAxis.y;	sz = s * vAxis.z;
+
+		return VMatrix(
+			tx * vAxis.x + c, tx * vAxis.y - sz, tx * vAxis.z + sy, 0.0f,
+			tx * vAxis.y + sz, ty * vAxis.y + c, ty * vAxis.z - sx, 0.0f,
+			tx * vAxis.z - sy, ty * vAxis.z + sx, tz * vAxis.z + c, 0.0f,
+			0.0f, 0.0f, 0.0f, 1.0f);
+	}
+	else
+	{
+		return SetupMatrixIdentity();
+	}
+}
+
 VMatrix SetupMatrixAngles(const QAngle& vAngles)
 {
 	VMatrix mRet;
@ -158,8 +196,19 @@ VMatrix SetupMatrixOrgAngles(const Vector3D& origin, const QAngle& vAngles)

 #endif // VECTOR_NO_SLOW_OPERATIONS

-
+#if 1
 bool PlaneIntersection(const VPlane& vp1, const VPlane& vp2, const VPlane& vp3, Vector3D& vOut)
+{
+	Vector3D v2Cross3 = CrossProduct(vp2.m_Normal, vp3.m_Normal);
+	float flDenom = DotProduct(vp1.m_Normal, v2Cross3);
+	if (fabs(flDenom) < FLT_EPSILON)
+		return false;
+	Vector3D vRet = vp1.m_Dist * v2Cross3 + vp2.m_Dist * CrossProduct(vp3.m_Normal, vp1.m_Normal) + vp3.m_Dist * CrossProduct(vp1.m_Normal, vp2.m_Normal);
+	vOut = vRet * (1.0 / flDenom);
+	return true;
+}
+#else  // old slow innaccurate code
+bool PlaneIntersection(const VPlane& vp1, const VPlane& vp2, const VPlane& vp3, Vector& vOut)
 {
 	VMatrix mMat, mInverse;

@ -169,7 +218,6 @@ bool PlaneIntersection(const VPlane& vp1, const VPlane& vp2, const VPlane& vp3,
 		vp3.m_Normal.x, vp3.m_Normal.y, vp3.m_Normal.z, -vp3.m_Dist,
 		0.0f, 0.0f, 0.0f, 1.0f
 	);
-
 	if (mMat.InverseGeneral(mInverse))
 	{
 		//vOut = mInverse * Vector(0.0f, 0.0f, 0.0f);
@ -181,7 +229,7 @@ bool PlaneIntersection(const VPlane& vp1, const VPlane& vp2, const VPlane& vp3,
 		return false;
 	}
 }
-
+#endif


 // ------------------------------------------------------------------------------------------- //
@ -303,7 +351,7 @@ bool MatrixInverseGeneral(const VMatrix& src, VMatrix& dst)
 	for (iRow = 0; iRow < 4; iRow++)
 	{
 		// Find the row with the largest element in this column.
-		fLargest = 0.00001f;
+		fLargest = 1e-6f;
 		iLargest = -1;
 		for (iTest = iRow; iTest < 4; iTest++)
 		{
@ -506,7 +554,7 @@ bool VMatrix::IsRotationMatrix() const
 		FloatMakePositive(v2.Dot(v3)) < 0.01f;
 }

-static void SetupMatrixAnglesInternal(vec_t m[4][4], const QAngle& vAngles)
+void VMatrix::SetupMatrixOrgAngles(const Vector3D& origin, const QAngle& vAngles)
 {
 	float		sr, sp, sy, cr, cp, cy;

@ -527,11 +575,6 @@ static void SetupMatrixAnglesInternal(vec_t m[4][4], const QAngle& vAngles)
 	m[0][3] = 0.f;
 	m[1][3] = 0.f;
 	m[2][3] = 0.f;
-}
-
-void VMatrix::SetupMatrixOrgAngles(const Vector3D& origin, const QAngle& vAngles)
-{
-	SetupMatrixAnglesInternal(m, vAngles);

 	// Add translation
 	m[0][3] = origin.x;
@ -544,21 +587,6 @@ void VMatrix::SetupMatrixOrgAngles(const Vector3D& origin, const QAngle& vAngles
 }


-void	VMatrix::SetupMatrixAngles(const QAngle& vAngles)
-{
-	SetupMatrixAnglesInternal(m, vAngles);
-
-	// Zero everything else
-	m[0][3] = 0.0f;
-	m[1][3] = 0.0f;
-	m[2][3] = 0.0f;
-	m[3][0] = 0.0f;
-	m[3][1] = 0.0f;
-	m[3][2] = 0.0f;
-	m[3][3] = 1.0f;
-}
-
-
 //-----------------------------------------------------------------------------
 // Sets matrix to identity
 //-----------------------------------------------------------------------------
@ -745,7 +773,7 @@ void Vector4DMultiplyPosition(const VMatrix& src1, Vector3D const& src2, Vector4
 {
 	// Make sure it works if src2 == dst
 	Vector3D tmp;
-	Vector3D const& v = (&src2 == &dst.AsVector3D()) ? static_cast<const Vector3D&>(tmp) : src2;
+	Vector3D const& v = (&src2 == &dst.AsVector3D()) ? static_cast<const Vector3D>(tmp) : src2;

 	if (&src2 == &dst.AsVector3D())
 	{
@ -768,7 +796,7 @@ void Vector3DMultiply(const VMatrix& src1, const Vector3D& src2, Vector3D& dst)
 {
 	// Make sure it works if src2 == dst
 	Vector3D tmp;
-	const Vector3D& v = (&src2 == &dst) ? static_cast<const Vector3D&>(tmp) : src2;
+	const Vector3D& v = (&src2 == &dst) ? static_cast<const Vector3D>(tmp) : src2;

 	if (&src2 == &dst)
 	{
@ -789,7 +817,7 @@ void Vector3DMultiplyPositionProjective(const VMatrix& src1, const Vector3D& src
 {
 	// Make sure it works if src2 == dst
 	Vector3D tmp;
-	const Vector3D& v = (&src2 == &dst) ? static_cast<const Vector3D&>(tmp) : src2;
+	const Vector3D& v = (&src2 == &dst) ? static_cast<const Vector3D>(tmp) : src2;
 	if (&src2 == &dst)
 	{
 		VectorCopy(src2, tmp);
@ -816,7 +844,7 @@ void Vector3DMultiplyProjective(const VMatrix& src1, const Vector3D& src2, Vecto
 {
 	// Make sure it works if src2 == dst
 	Vector3D tmp;
-	const Vector3D& v = (&src2 == &dst) ? static_cast<const Vector3D&>(tmp) : src2;
+	const Vector3D& v = (&src2 == &dst) ? static_cast<const Vector3D>(tmp) : src2;
 	if (&src2 == &dst)
 	{
 		VectorCopy(src2, tmp);
@ -869,7 +897,7 @@ void Vector3DMultiplyTranspose(const VMatrix& src1, const Vector3D& src2, Vector
 	bool srcEqualsDst = (&src2 == &dst);

 	Vector3D tmp;
-	const Vector3D& v = srcEqualsDst ? static_cast<const Vector3D&>(tmp) : src2;
+	const Vector3D& v = srcEqualsDst ? static_cast<const Vector3D>(tmp) : src2;

 	if (srcEqualsDst)
 	{
@ -954,7 +982,7 @@ void MatrixBuildTranslation(VMatrix& dst, const Vector3D& translation)
 //-----------------------------------------------------------------------------
 void MatrixBuildRotationAboutAxis(VMatrix& dst, const Vector3D& vAxisOfRot, float angleDegrees)
 {
-	MatrixBuildRotationAboutAxis(vAxisOfRot, angleDegrees, const_cast<matrix3x4_t&> (dst.As3x4()));
+	MatrixBuildRotationAboutAxis(vAxisOfRot, angleDegrees, dst.As3x4());
 	dst[3][0] = 0;
 	dst[3][1] = 0;
 	dst[3][2] = 0;
@ -1006,6 +1034,13 @@ void MatrixBuildRotation(VMatrix& dst, const Vector3D& initialDirection, const V
 	}

 	MatrixBuildRotationAboutAxis(dst, axis, angle);
+
+#ifdef _DEBUG
+	Vector3D test;
+	Vector3DMultiply(dst, initialDirection, test);
+	test -= finalDirection;
+	Assert(test.LengthSqr() < 1e-3);
+#endif
 }

 //-----------------------------------------------------------------------------
@ -1163,8 +1198,7 @@ void CalculateSphereFromProjectionMatrix(const VMatrix& worldToVolume, Vector3D*
 }


-static inline void FrustumPlanesFromMatrixHelper(const VMatrix& shadowToWorld, const Vector3D& p1, const Vector3D& p2, const Vector3D& p3,
-	Vector3D& normal, float& dist)
+static inline void FrustumPlanesFromMatrixHelper(const VMatrix& shadowToWorld, const Vector3D& p1, const Vector3D& p2, const Vector3D& p3, VPlane& plane)
 {
 	Vector3D world1, world2, world3;
 	Vector3DMultiplyPositionProjective(shadowToWorld, p1, world1);
@ -1175,41 +1209,37 @@ static inline void FrustumPlanesFromMatrixHelper(const VMatrix& shadowToWorld, c
 	VectorSubtract(world2, world1, v1);
 	VectorSubtract(world3, world1, v2);

-	CrossProduct(v1, v2, normal);
-	VectorNormalize(normal);
-	dist = DotProduct(normal, world1);
+	CrossProduct(v1, v2, plane.m_Normal);
+	VectorNormalize(plane.m_Normal);
+	plane.m_Dist = DotProduct(plane.m_Normal, world1);
 }

 void FrustumPlanesFromMatrix(const VMatrix& clipToWorld, Frustum_t& frustum)
 {
-	Vector3D normal;
-	float dist;
+	VPlane planes[6];

 	FrustumPlanesFromMatrixHelper(clipToWorld,
-		Vector3D(0.0f, 0.0f, 0.0f), Vector3D(1.0f, 0.0f, 0.0f), Vector3D(0.0f, 1.0f, 0.0f), normal, dist);
-	frustum.SetPlane(FRUSTUM_NEARZ, PLANE_ANYZ, normal, dist);
+		Vector3D(0.0f, 0.0f, 0.0f), Vector3D(1.0f, 0.0f, 0.0f), Vector3D(0.0f, 1.0f, 0.0f), planes[FRUSTUM_NEARZ]);

 	FrustumPlanesFromMatrixHelper(clipToWorld,
-		Vector3D(0.0f, 0.0f, 1.0f), Vector3D(0.0f, 1.0f, 1.0f), Vector3D(1.0f, 0.0f, 1.0f), normal, dist);
-	frustum.SetPlane(FRUSTUM_FARZ, PLANE_ANYZ, normal, dist);
+		Vector3D(0.0f, 0.0f, 1.0f), Vector3D(0.0f, 1.0f, 1.0f), Vector3D(1.0f, 0.0f, 1.0f), planes[FRUSTUM_FARZ]);

 	FrustumPlanesFromMatrixHelper(clipToWorld,
-		Vector3D(1.0f, 0.0f, 0.0f), Vector3D(1.0f, 1.0f, 1.0f), Vector3D(1.0f, 1.0f, 0.0f), normal, dist);
-	frustum.SetPlane(FRUSTUM_RIGHT, PLANE_ANYZ, normal, dist);
+		Vector3D(1.0f, 0.0f, 0.0f), Vector3D(1.0f, 1.0f, 1.0f), Vector3D(1.0f, 1.0f, 0.0f), planes[FRUSTUM_RIGHT]);

 	FrustumPlanesFromMatrixHelper(clipToWorld,
-		Vector3D(0.0f, 0.0f, 0.0f), Vector3D(0.0f, 1.0f, 1.0f), Vector3D(0.0f, 0.0f, 1.0f), normal, dist);
-	frustum.SetPlane(FRUSTUM_LEFT, PLANE_ANYZ, normal, dist);
+		Vector3D(0.0f, 0.0f, 0.0f), Vector3D(0.0f, 1.0f, 1.0f), Vector3D(0.0f, 0.0f, 1.0f), planes[FRUSTUM_LEFT]);

 	FrustumPlanesFromMatrixHelper(clipToWorld,
-		Vector3D(1.0f, 1.0f, 0.0f), Vector3D(1.0f, 1.0f, 1.0f), Vector3D(0.0f, 1.0f, 1.0f), normal, dist);
-	frustum.SetPlane(FRUSTUM_TOP, PLANE_ANYZ, normal, dist);
+		Vector3D(1.0f, 1.0f, 0.0f), Vector3D(1.0f, 1.0f, 1.0f), Vector3D(0.0f, 1.0f, 1.0f), planes[FRUSTUM_TOP]);

 	FrustumPlanesFromMatrixHelper(clipToWorld,
-		Vector3D(1.0f, 0.0f, 0.0f), Vector3D(0.0f, 0.0f, 1.0f), Vector3D(1.0f, 0.0f, 1.0f), normal, dist);
-	frustum.SetPlane(FRUSTUM_BOTTOM, PLANE_ANYZ, normal, dist);
+		Vector3D(1.0f, 0.0f, 0.0f), Vector3D(0.0f, 0.0f, 1.0f), Vector3D(1.0f, 0.0f, 1.0f), planes[FRUSTUM_BOTTOM]);
+
+	frustum.SetPlanes(planes);
 }

+// BEWARE: top/bottom are FLIPPED relative to D3DXMatrixOrthoOffCenterRH().
 void MatrixBuildOrtho(VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar)
 {
 	// FIXME: This is being used incorrectly! Should read:
@ -1243,29 +1273,19 @@ void MatrixBuildOrtho(VMatrix& dst, double left, double top, double right, doubl
 		0.0f, 0.0f, 0.0f, 1.0f);
 }

-void MatrixBuildPerspectiveZRange(VMatrix& dst, double flZNear, double flZFar)
-{
-	dst.m[2][0] = 0.0f;
-	dst.m[2][1] = 0.0f;
-	dst.m[2][2] = flZFar / (flZNear - flZFar);
-	dst.m[2][3] = flZNear * flZFar / (flZNear - flZFar);
-}
-
 void MatrixBuildPerspectiveX(VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar)
 {
-	float flWidthScale = 1.0f / tanf(flFovX * M_PI / 360.0f);
-	float flHeightScale = flAspect * flWidthScale;
-	dst.Init(flWidthScale, 0.0f, 0.0f, 0.0f,
-		0.0f, flHeightScale, 0.0f, 0.0f,
-		0.0f, 0.0f, 0.0f, 0.0f,
+	float flWidth = 2.0f * flZNear * tanf(flFovX * M_PI / 360.0f);
+	float flHeight = flWidth / flAspect;
+	dst.Init(2.0f * flZNear / flWidth, 0.0f, 0.0f, 0.0f,
+		0.0f, 2.0f * flZNear / flHeight, 0.0f, 0.0f,
+		0.0f, 0.0f, flZFar / (flZNear - flZFar), flZNear * flZFar / (flZNear - flZFar),
 		0.0f, 0.0f, -1.0f, 0.0f);
-
-	MatrixBuildPerspectiveZRange(dst, flZNear, flZFar);
 }

 void MatrixBuildPerspectiveOffCenterX(VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right)
 {
-	float flWidth = tanf(flFovX * M_PI / 360.0f);
+	float flWidth = 2.0f * flZNear * tanf(flFovX * M_PI / 360.0f);
 	float flHeight = flWidth / flAspect;

 	// bottom, top, left, right are 0..1 so convert to -<val>/2..<val>/2
@ -1274,12 +1294,58 @@ void MatrixBuildPerspectiveOffCenterX(VMatrix& dst, double flFovX, double flAspe
 	float flBottom = -(flHeight / 2.0f) * (1.0f - bottom) + bottom * (flHeight / 2.0f);
 	float flTop = -(flHeight / 2.0f) * (1.0f - top) + top * (flHeight / 2.0f);

-	dst.Init(1.0f / (flRight - flLeft), 0.0f, (flLeft + flRight) / (flRight - flLeft), 0.0f,
-		0.0f, 1.0f / (flTop - flBottom), (flTop + flBottom) / (flTop - flBottom), 0.0f,
-		0.0f, 0.0f, 0.0f, 0.0f,
+	dst.Init((2.0f * flZNear) / (flRight - flLeft), 0.0f, (flLeft + flRight) / (flRight - flLeft), 0.0f,
+		0.0f, 2.0f * flZNear / (flTop - flBottom), (flTop + flBottom) / (flTop - flBottom), 0.0f,
+		0.0f, 0.0f, flZFar / (flZNear - flZFar), flZNear * flZFar / (flZNear - flZFar),
 		0.0f, 0.0f, -1.0f, 0.0f);
-
-	MatrixBuildPerspectiveZRange(dst, flZNear, flZFar);
 }
-#endif // !_STATIC_LINKED || _SHARED_LIB

+void ExtractClipPlanesFromNonTransposedMatrix(const VMatrix& viewProjMatrix, VPlane* pPlanesOut, bool bD3DClippingRange)
+{
+	// Left
+	Vector4D vPlane = MatrixGetRowAsVector4D(viewProjMatrix, 0) + MatrixGetRowAsVector4D(viewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_LEFT].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	// Right
+	vPlane = -MatrixGetRowAsVector4D(viewProjMatrix, 0) + MatrixGetRowAsVector4D(viewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_RIGHT].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	// Bottom
+	vPlane = MatrixGetRowAsVector4D(viewProjMatrix, 1) + MatrixGetRowAsVector4D(viewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_BOTTOM].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	// Top
+	vPlane = -MatrixGetRowAsVector4D(viewProjMatrix, 1) + MatrixGetRowAsVector4D(viewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_TOP].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	// Near
+	if (bD3DClippingRange)
+	{
+		// [0,1] Z clipping range (D3D-style)
+		vPlane = MatrixGetRowAsVector4D(viewProjMatrix, 2);
+	}
+	else
+	{
+		// [-1,1] Z clipping range (OpenGL-style)
+		vPlane = MatrixGetRowAsVector4D(viewProjMatrix, 2) + MatrixGetRowAsVector4D(viewProjMatrix, 3);
+	}
+
+	pPlanesOut[FRUSTUM_NEARZ].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	// Far
+	vPlane = -MatrixGetRowAsVector4D(viewProjMatrix, 2) + MatrixGetRowAsVector4D(viewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_FARZ].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	for (uint i = 0; i < FRUSTUM_NUMPLANES; ++i)
+	{
+		float flLen2 = pPlanesOut[i].m_Normal.x * pPlanesOut[i].m_Normal.x + pPlanesOut[i].m_Normal.y * pPlanesOut[i].m_Normal.y + pPlanesOut[i].m_Normal.z * pPlanesOut[i].m_Normal.z;
+		if (flLen2 != 0.0f)
+		{
+			float flScale = 1.0f / sqrt(flLen2);
+			pPlanesOut[i].m_Normal *= flScale;
+			pPlanesOut[i].m_Dist *= flScale;
+		}
+	}
+}
+
+#endif // !_STATIC_LINKED || _SHARED_LIB
--- a/r5dev/mathlib/vmatrix.h
+++ b/r5dev/mathlib/vmatrix.h
@ -1,4 +1,4 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//========= Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ============//
 //
 // Purpose: 
 //
@ -54,10 +54,9 @@ public:
 	// Creates a matrix where the X axis = forward
 	// the Y axis = left, and the Z axis = up
 	VMatrix(const Vector3D& forward, const Vector3D& left, const Vector3D& up);
-	VMatrix(const Vector3D& forward, const Vector3D& left, const Vector3D& up, const Vector3D& translation);

 	// Construct from a 3x4 matrix
-	VMatrix(const matrix3x4_t& matrix3x4);
+	explicit VMatrix(const matrix3x4_t& matrix3x4);

 	// Set the values in the matrix.
 	void		Init(
@ -107,6 +106,7 @@ public:
 	void		PreTranslate(const Vector3D& vTrans);
 	void		PostTranslate(const Vector3D& vTrans);

+	matrix3x4_t& As3x4();
 	const matrix3x4_t& As3x4() const;
 	void		CopyFrom3x4(const matrix3x4_t& m3x4);
 	void		Set3x4(matrix3x4_t& matrix3x4) const;
@ -199,9 +199,6 @@ public:
 	// Setup a matrix for origin and angles.
 	void		SetupMatrixOrgAngles(const Vector3D& origin, const QAngle& vAngles);

-	// Setup a matrix for angles and no translation.
-	void		SetupMatrixAngles(const QAngle& vAngles);
-
 	// General inverse. This may fail so check the return!
 	bool		InverseGeneral(VMatrix& vInverse) const;

@ -217,7 +214,7 @@ public:
 	VMatrix		InverseTR() const;

 	// Get the scale of the matrix's basis vectors.
-	Vector3D	GetScale() const;
+	Vector3D		GetScale() const;

 	// (Fast) multiply by a scaling matrix setup from vScale.
 	VMatrix		Scale(const Vector3D& vScale);
@ -263,6 +260,9 @@ VMatrix		SetupMatrixProjection(const Vector3D& vOrigin, const VPlane& thePlane);
 // Setup a matrix to rotate the specified amount around the specified axis.
 VMatrix		SetupMatrixAxisRot(const Vector3D& vAxis, vec_t fDegrees);

+// Setup a matrix to rotate one axis onto another. Input vectors must be normalized.
+VMatrix		SetupMatrixAxisToAxisRot(const Vector3D& vFromAxis, const Vector3D& vToAxis);
+
 // Setup a matrix from euler angles. Just sets identity and calls MatrixAngles.
 VMatrix		SetupMatrixAngles(const QAngle& vAngles);

@ -460,16 +460,6 @@ inline VMatrix::VMatrix(const Vector3D& xAxis, const Vector3D& yAxis, const Vect
 	);
 }

-inline VMatrix::VMatrix(const Vector3D& xAxis, const Vector3D& yAxis, const Vector3D& zAxis, const Vector3D& translation)
-{
-	Init(
-		xAxis.x, yAxis.x, zAxis.x, translation.x,
-		xAxis.y, yAxis.y, zAxis.y, translation.y,
-		xAxis.z, yAxis.z, zAxis.z, translation.z,
-		0.0f, 0.0f, 0.0f, 1.0f
-	);
-}
-

 inline void VMatrix::Init(
 	vec_t m00, vec_t m01, vec_t m02, vec_t m03,
@ -629,6 +619,11 @@ inline const matrix3x4_t& VMatrix::As3x4() const
 	return *((const matrix3x4_t*)this);
 }

+inline matrix3x4_t& VMatrix::As3x4()
+{
+	return *((matrix3x4_t*)this);
+}
+
 inline void VMatrix::CopyFrom3x4(const matrix3x4_t& m3x4)
 {
 	memcpy(m, m3x4.Base(), sizeof(matrix3x4_t));
@ -691,7 +686,7 @@ inline VMatrix VMatrix::operator-() const
 	VMatrix ret;
 	for (int i = 0; i < 16; i++)
 	{
-		((float*)ret.m)[i] = ((float*)m)[i];
+		((float*)ret.m)[i] = -((float*)m)[i];
 	}
 	return ret;
 }
@ -908,9 +903,9 @@ inline bool MatricesAreEqual(const VMatrix& src1, const VMatrix& src2, float flT
 //
 //-----------------------------------------------------------------------------
 void MatrixBuildOrtho(VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar);
+void MatrixBuildOrthoLH(VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar);
 void MatrixBuildPerspectiveX(VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar);
 void MatrixBuildPerspectiveOffCenterX(VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar, double bottom, double top, double left, double right);
-void MatrixBuildPerspectiveZRange(VMatrix& dst, double flZNear, double flZFar);

 inline void MatrixOrtho(VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar)
 {
@ -922,6 +917,16 @@ inline void MatrixOrtho(VMatrix& dst, double left, double top, double right, dou
 	dst = temp;
 }

+inline void MatrixBuildOrthoLH(VMatrix& dst, double left, double top, double right, double bottom, double zNear, double zFar)
+{
+	// Same as XMMatrixOrthographicOffCenterLH().
+	dst.Init(
+		2.0f / (right - left), 0.0f, 0.0f, (left + right) / (left - right),
+		0.0f, 2.0f / (bottom - top), 0.0f, (bottom + top) / (top - bottom),
+		0.0f, 0.0f, 1.0f / (zFar - zNear), zNear / (zNear - zFar),
+		0.0f, 0.0f, 0.0f, 1.0f);
+}
+
 inline void MatrixPerspectiveX(VMatrix& dst, double flFovX, double flAspect, double flZNear, double flZFar)
 {
 	VMatrix mat;
@ -942,6 +947,61 @@ inline void MatrixPerspectiveOffCenterX(VMatrix& dst, double flFovX, double flAs
 	dst = temp;
 }

+inline Vector4D GetMatrixColumnAsVector4D(const VMatrix& mMatrix, int nCol)
+{
+	Vector4D vColumnOut;
+	vColumnOut.x = mMatrix.m[0][nCol];
+	vColumnOut.y = mMatrix.m[1][nCol];
+	vColumnOut.z = mMatrix.m[2][nCol];
+	vColumnOut.w = mMatrix.m[3][nCol];
+	return vColumnOut;
+}
+
+inline Vector4D MatrixGetRowAsVector4D(const VMatrix& src, int nRow)
+{
+	Assert((nRow >= 0) && (nRow <= 3));
+	return Vector4D(src[nRow]);
+}
+
+//-----------------------------------------------------------------------------
+// Extracts clip planes from an arbitrary view projection matrix.
+// This function assumes the matrix has been transposed.
+//-----------------------------------------------------------------------------
+inline void ExtractClipPlanesFromTransposedMatrix(const VMatrix& transposedViewProjMatrix, VPlane* pPlanesOut)
+{
+	// Left
+	Vector4D vPlane = GetMatrixColumnAsVector4D(transposedViewProjMatrix, 0) + GetMatrixColumnAsVector4D(transposedViewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_LEFT].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	// Right
+	vPlane = -GetMatrixColumnAsVector4D(transposedViewProjMatrix, 0) + GetMatrixColumnAsVector4D(transposedViewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_RIGHT].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	// Bottom
+	vPlane = GetMatrixColumnAsVector4D(transposedViewProjMatrix, 1) + GetMatrixColumnAsVector4D(transposedViewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_BOTTOM].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	// Top
+	vPlane = -GetMatrixColumnAsVector4D(transposedViewProjMatrix, 1) + GetMatrixColumnAsVector4D(transposedViewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_TOP].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	// Near
+	vPlane = GetMatrixColumnAsVector4D(transposedViewProjMatrix, 2) + GetMatrixColumnAsVector4D(transposedViewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_NEARZ].Init(vPlane.AsVector3D(), -vPlane.w);
+
+	// Far
+	vPlane = -GetMatrixColumnAsVector4D(transposedViewProjMatrix, 2) + GetMatrixColumnAsVector4D(transposedViewProjMatrix, 3);
+	pPlanesOut[FRUSTUM_FARZ].Init(vPlane.AsVector3D(), -vPlane.w);
+}
+
+//-----------------------------------------------------------------------------
+// Extracts clip planes from an arbitrary view projection matrix.
+// Differences from ExtractClipPlanesFromTransposedMatrix():
+// This function assumes the matrix has NOT been transposed.
+// If bD3DClippingRange is true, the projection space clipping range is assumed
+// to be [0,1], vs. the OpenGL range [-1,1].
+// This function always returns normalized planes.
+//-----------------------------------------------------------------------------
+void ExtractClipPlanesFromNonTransposedMatrix(const VMatrix& viewProjMatrix, VPlane* pPlanesOut, bool bD3DClippingRange = true);
+
 #endif
-
-
--- a/r5dev/mathlib/vplane.h
+++ b/r5dev/mathlib/vplane.h
@ -1,4 +1,4 @@
-//========= Copyright Valve Corporation, All rights reserved. ============//
+//========= Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ============//
 //
 // Purpose: 
 //
@ -25,7 +25,6 @@ typedef int SideType;

 #define VP_EPSILON	0.01f

-
 class VPlane
 {
 public:
@ -63,7 +62,7 @@ public:
 #endif

 public:
-	Vector3D	m_Normal;
+	Vector3D		m_Normal;
 	vec_t		m_Dist;

 #ifdef VECTOR_NO_SLOW_OPERATIONS
@ -176,7 +175,4 @@ inline SideType VPlane::BoxOnPlaneSide(const Vector3D& vMin, const Vector3D& vMa
 	return firstSide;
 }

-
-
-
 #endif // VPLANE_H
--- a/r5dev/tier0/basetypes.h
+++ b/r5dev/tier0/basetypes.h
@ -154,6 +154,78 @@
 #define MAX( a, b ) ( ( ( a ) > ( b ) ) ? ( a ) : ( b ) )
 #endif

+#ifdef __cplusplus
+
+template< class T, class Y, class X >
+inline T clamp(T const& val, Y const& minVal, X const& maxVal)
+{
+	if (val < minVal)
+		return minVal;
+	else if (val > maxVal)
+		return maxVal;
+	else
+		return val;
+}
+
+// This is the preferred clamp operator. Using the clamp macro can lead to
+// unexpected side-effects or more expensive code. Even the clamp (all
+// lower-case) function can generate more expensive code because of the
+// mixed types involved.
+template< class T >
+T Clamp(T const& val, T const& minVal, T const& maxVal)
+{
+	if (val < minVal)
+		return minVal;
+	else if (val > maxVal)
+		return maxVal;
+	else
+		return val;
+}
+
+// This is the preferred Min operator. Using the MIN macro can lead to unexpected
+// side-effects or more expensive code.
+template< class T >
+T Min(T const& val1, T const& val2)
+{
+	return val1 < val2 ? val1 : val2;
+}
+
+// This is the preferred Max operator. Using the MAX macro can lead to unexpected
+// side-effects or more expensive code.
+template< class T >
+T Max(T const& val1, T const& val2)
+{
+	return val1 > val2 ? val1 : val2;
+}
+
+template <typename T>
+void Swap(T& a, T& b)
+{
+	T temp = a;
+	a = b;
+	b = temp;
+}
+
+#else
+
+#define clamp(val, min, max) (((val) > (max)) ? (max) : (((val) < (min)) ? (min) : (val)))
+
+#endif
+
+#define fsel(c,x,y) ( (c) >= 0 ? (x) : (y) )
+
+// integer conditional move
+// if a >= 0, return x, else y
+#define isel(a,x,y) ( ((a) >= 0) ? (x) : (y) )
+
+// if x = y, return a, else b
+#define ieqsel(x,y,a,b) (( (x) == (y) ) ? (a) : (b))
+
+// if the nth bit of a is set (counting with 0 = LSB),
+// return x, else y
+// this is fast if nbit is a compile-time immediate 
+#define ibitsel(a, nbit, x, y) ( ( ((a) & (1 << (nbit))) != 0 ) ? (x) : (y) )
+
 // MSVC CRT uses 0x7fff while gcc uses MAX_INT, leading to mismatches between platforms
 // As a result, we pick the least common denominator here.  This should be used anywhere
 // you might typically want to use RAND_MAX
--- a/r5dev/tier0/dbg.cpp
+++ b/r5dev/tier0/dbg.cpp
@ -8,9 +8,10 @@

 #include "core/stdafx.h"
 #include "core/logdef.h"
+#include "tier0/dbg.h"
 #include "tier0/platform.h"
 #include "tier0/threadtools.h"
-#include "tier0/dbg.h"
+#include <tier0/commandline.h>
 #ifndef DEDICATED
 #include "vgui/vgui_debugpanel.h"
 #include "gameui/IConsole.h"
@ -28,7 +29,12 @@ std::mutex s_LogMutex;
 //-----------------------------------------------------------------------------
 bool HushAsserts()
 {
+#ifdef DBGFLAG_ASSERT
+	static bool s_bHushAsserts = !!CommandLine()->FindParm("-hushasserts");
+	return s_bHushAsserts;
+#else
 	return true;
+#endif
 }

 //-----------------------------------------------------------------------------
--- a/r5dev/tier0/dbg.h
+++ b/r5dev/tier0/dbg.h
@ -8,6 +8,7 @@
 #ifndef DBG_H
 #define DBG_H
 #define Assert assert
+#define AssertDbg assert
 #include "tier0/dbgflag.h"

 bool HushAsserts();
--- a/r5dev/tier0/platform.h
+++ b/r5dev/tier0/platform.h
@ -141,6 +141,12 @@
 #define IS_WINDOWS_PC 1
 #endif

+#if _MSC_VER >= 1800
+#define	VECTORCALL __vectorcall 
+#else 
+#define	VECTORCALL 
+#endif
+
 #endif // CROSS_PLATFORM_VERSION < 2

 #if defined( GNUC )	&& !defined( COMPILER_PS3 ) // use pre-align on PS3
@ -282,6 +288,8 @@ inline int64 CastPtrToInt64(const void* p)

 #endif

+#define NO_MALLOC_OVERRIDE
+
 //-----------------------------------------------------------------------------
 // Various compiler-specific keywords
 //-----------------------------------------------------------------------------
--- a/r5dev/tier0/threadtools.cpp
+++ b/r5dev/tier0/threadtools.cpp
@ -0,0 +1,31 @@
+//===== Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ======//
+//
+// Purpose: Random number generator
+//
+// $Workfile: $
+// $NoKeywords: $
+//===========================================================================//
+
+#include "core/stdafx.h"
+#include "threadtools.h"
+
+LONG ThreadInterlockedCompareExchange64(LONG volatile* pDest, int64 value, int64 comperand)
+{
+	return _InterlockedCompareExchange(pDest, comperand, value);
+}
+
+bool ThreadInterlockedAssignIf(LONG volatile* p, int32 value, int32 comperand)
+{
+	Assert((size_t)p % 4 == 0);
+	return _InterlockedCompareExchange(p, comperand, value);
+}
+
+int64 ThreadInterlockedCompareExchange64(int64 volatile* pDest, int64 value, int64 comperand)
+{
+	return _InterlockedCompareExchange64(pDest, comperand, value);
+}
+
+bool ThreadInterlockedAssignIf64(int64 volatile* pDest, int64 value, int64 comperand)
+{
+	return _InterlockedCompareExchange64(pDest, comperand, value);
+}
--- a/r5dev/tier0/threadtools.h
+++ b/r5dev/tier0/threadtools.h
@ -1,11 +1,6 @@
 #ifndef THREADTOOLS_H
 #define THREADTOOLS_H

-inline bool ThreadInterlockedAssignIf(LONG volatile* p, int32 value, int32 comperand)
-{
-	Assert((size_t)p % 4 == 0);
-	return _InterlockedCompareExchange(p, comperand, value);
-}
 inline void ThreadSleep(unsigned nMilliseconds)
 {
 #ifdef _WIN32
@ -38,6 +33,169 @@ inline void ThreadSleep(unsigned nMilliseconds)
 	usleep(nMilliseconds * 1000);
 #endif
 }
+inline void ThreadPause()
+{
+#if defined( COMPILER_PS3 )
+	__db16cyc();
+#elif defined( COMPILER_GCC )
+	__asm __volatile("pause");
+#elif defined ( COMPILER_MSVC64 )
+	_mm_pause();
+#elif defined( COMPILER_MSVC32 )
+	__asm pause;
+#elif defined( COMPILER_MSVCX360 )
+	YieldProcessor();
+	__asm { or r0, r0, r0 }
+	YieldProcessor();
+	__asm { or r1, r1, r1 }
+#else
+#error "implement me"
+#endif
+}
+LONG ThreadInterlockedCompareExchange64(LONG volatile* pDest, int64 value, int64 comperand);
+bool ThreadInterlockedAssignIf(LONG volatile* p, int32 value, int32 comperand);
+int64 ThreadInterlockedCompareExchange64(int64 volatile* pDest, int64 value, int64 comperand);
+bool ThreadInterlockedAssignIf64(int64 volatile* pDest, int64 value, int64 comperand);
+
+//-----------------------------------------------------------------------------
+//
+// Interlock methods. These perform very fast atomic thread
+// safe operations. These are especially relevant in a multi-core setting.
+//
+//-----------------------------------------------------------------------------
+
+#ifdef _WIN32
+#define NOINLINE
+#elif defined( _PS3 )
+#define NOINLINE __attribute__ ((noinline))
+#elif defined(POSIX)
+#define NOINLINE __attribute__ ((noinline))
+#endif
+
+#if defined( _X360 ) || defined( _PS3 )
+#define ThreadMemoryBarrier() __lwsync()
+#elif defined(COMPILER_MSVC)
+// Prevent compiler reordering across this barrier. This is
+// sufficient for most purposes on x86/x64.
+#define ThreadMemoryBarrier() _ReadWriteBarrier()
+#elif defined(COMPILER_GCC)
+// Prevent compiler reordering across this barrier. This is
+// sufficient for most purposes on x86/x64.
+// http://preshing.com/20120625/memory-ordering-at-compile-time
+#define ThreadMemoryBarrier() asm volatile("" ::: "memory")
+#else
+#error Every platform needs to define ThreadMemoryBarrier to at least prevent compiler reordering
+#endif
+
+//-----------------------------------------------------------------------------
+//
+// A super-fast thread-safe integer A simple class encapsulating the notion of an 
+// atomic integer used across threads that uses the built in and faster 
+// "interlocked" functionality rather than a full-blown mutex. Useful for simple 
+// things like reference counts, etc.
+//
+//-----------------------------------------------------------------------------
+
+template <typename T>
+class CInterlockedIntT
+{
+public:
+	CInterlockedIntT() : m_value(0) { static_assert((sizeof(T) == sizeof(int32)) || (sizeof(T) == sizeof(int64))); }
+
+	CInterlockedIntT(T value) : m_value(value) {}
+
+	T operator()(void) const { return m_value; }
+	operator T() const { return m_value; }
+
+	bool operator!() const { return (m_value == 0); }
+	bool operator==(T rhs) const { return (m_value == rhs); }
+	bool operator!=(T rhs) const { return (m_value != rhs); }
+
+	T operator++() {
+		if (sizeof(T) == sizeof(int32))
+			return (T)ThreadInterlockedIncrement((int32*)&m_value);
+		else
+			return (T)ThreadInterlockedIncrement64((int64*)&m_value);
+	}
+	T operator++(int) { return operator++() - 1; }
+
+	T operator--() {
+		if (sizeof(T) == sizeof(int32))
+			return (T)ThreadInterlockedDecrement((int32*)&m_value);
+		else
+			return (T)ThreadInterlockedDecrement64((int64*)&m_value);
+	}
+
+	T operator--(int) { return operator--() + 1; }
+
+	bool AssignIf(T conditionValue, T newValue)
+	{
+		if (sizeof(T) == sizeof(int32))
+			return ThreadInterlockedAssignIf((LONG*)&m_value, (int32)newValue, (int32)conditionValue);
+		else
+			return ThreadInterlockedAssignIf64((int64*)&m_value, (int64)newValue, (int64)conditionValue);
+	}
+
+
+	T operator=(T newValue) {
+		if (sizeof(T) == sizeof(int32))
+			ThreadInterlockedExchange((int32*)&m_value, newValue);
+		else
+			ThreadInterlockedExchange64((int64*)&m_value, newValue);
+		return m_value;
+	}
+
+	// Atomic add is like += except it returns the previous value as its return value
+	T AtomicAdd(T add) {
+		if (sizeof(T) == sizeof(int32))
+			return (T)ThreadInterlockedExchangeAdd((int32*)&m_value, (int32)add);
+		else
+			return (T)ThreadInterlockedExchangeAdd64((int64*)&m_value, (int64)add);
+	}
+
+
+	void operator+=(T add) {
+		if (sizeof(T) == sizeof(int32))
+			ThreadInterlockedExchangeAdd((int32*)&m_value, (int32)add);
+		else
+			ThreadInterlockedExchangeAdd64((int64*)&m_value, (int64)add);
+	}
+
+	void operator-=(T subtract) { operator+=(-subtract); }
+	void operator*=(T multiplier) {
+		T original, result;
+		do
+		{
+			original = m_value;
+			result = original * multiplier;
+		} while (!AssignIf(original, result));
+	}
+	void operator/=(T divisor) {
+		T original, result;
+		do
+		{
+			original = m_value;
+			result = original / divisor;
+		} while (!AssignIf(original, result));
+	}
+
+	T operator+(T rhs) const { return m_value + rhs; }
+	T operator-(T rhs) const { return m_value - rhs; }
+
+	T InterlockedExchange(T newValue) {
+		if (sizeof(T) == sizeof(int32))
+			return (T)ThreadInterlockedExchange((int32*)&m_value, newValue);
+		else
+			return (T)ThreadInterlockedExchange64((int64*)&m_value, newValue);
+	}
+
+private:
+	volatile T m_value;
+};
+
+typedef CInterlockedIntT<int> CInterlockedInt;
+typedef CInterlockedIntT<unsigned> CInterlockedUInt;
+
 //=============================================================================
 class CThreadFastMutex;

--- a/r5dev/vproj/clientsdk.vcxproj
+++ b/r5dev/vproj/clientsdk.vcxproj
@ -11,6 +11,7 @@
    </ProjectConfiguration>
  </ItemGroup>
  <ItemGroup>
+    <ClCompile Include="..\bonesetup\bone_utils.cpp" />
    <ClCompile Include="..\bsplib\bsplib.cpp" />
    <ClCompile Include="..\client\cdll_engine_int.cpp" />
    <ClCompile Include="..\client\vengineclient_impl.cpp" />
@ -70,6 +71,8 @@
    <ClCompile Include="..\mathlib\sha1.cpp" />
    <ClCompile Include="..\mathlib\sha256.cpp" />
    <ClCompile Include="..\mathlib\sseconst.cpp" />
+    <ClCompile Include="..\mathlib\ssenoise.cpp" />
+    <ClCompile Include="..\mathlib\transform.cpp" />
    <ClCompile Include="..\mathlib\vmatrix.cpp" />
    <ClCompile Include="..\networksystem\pylon.cpp" />
    <ClCompile Include="..\protoc\cl_rcon.pb.cc">
@ -102,6 +105,7 @@
    <ClCompile Include="..\tier0\fasttimer.cpp" />
    <ClCompile Include="..\tier0\jobthread.cpp" />
    <ClCompile Include="..\tier0\platform.cpp" />
+    <ClCompile Include="..\tier0\threadtools.cpp" />
    <ClCompile Include="..\tier0\tslist.cpp" />
    <ClCompile Include="..\tier1\bitbuf.cpp" />
    <ClCompile Include="..\tier1\cmd.cpp" />
@ -201,6 +205,7 @@
    <ClInclude Include="..\mathlib\bits.h" />
    <ClInclude Include="..\mathlib\color.h" />
    <ClInclude Include="..\mathlib\crc32.h" />
+    <ClInclude Include="..\mathlib\fltx4.h" />
    <ClInclude Include="..\mathlib\halton.h" />
    <ClInclude Include="..\mathlib\IceKey.H" />
    <ClInclude Include="..\mathlib\mathlib.h" />
@ -210,9 +215,9 @@
    <ClInclude Include="..\mathlib\sha1.h" />
    <ClInclude Include="..\mathlib\sha256.h" />
    <ClInclude Include="..\mathlib\ssemath.h" />
-    <ClInclude Include="..\mathlib\ssenoise.h" />
    <ClInclude Include="..\mathlib\ssequaternion.h" />
    <ClInclude Include="..\mathlib\swap.h" />
+    <ClInclude Include="..\mathlib\transform.h" />
    <ClInclude Include="..\mathlib\vector.h" />
    <ClInclude Include="..\mathlib\vector2d.h" />
    <ClInclude Include="..\mathlib\vector4d.h" />
--- a/r5dev/vproj/clientsdk.vcxproj.filters
+++ b/r5dev/vproj/clientsdk.vcxproj.filters
@ -211,6 +211,9 @@
    <Filter Include="sdk\engine\client">
      <UniqueIdentifier>{01d3645a-16c3-4910-ac95-049e112cd2b8}</UniqueIdentifier>
    </Filter>
+    <Filter Include="sdk\bonesetup">
+      <UniqueIdentifier>{57e1f0c7-ce4f-4576-960e-0cd15b2b5092}</UniqueIdentifier>
+    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\client\cdll_engine_int.cpp">
@ -546,6 +549,18 @@
    <ClCompile Include="..\tier2\meshutils.cpp">
      <Filter>sdk\tier2</Filter>
    </ClCompile>
+    <ClCompile Include="..\bonesetup\bone_utils.cpp">
+      <Filter>sdk\bonesetup</Filter>
+    </ClCompile>
+    <ClCompile Include="..\mathlib\ssenoise.cpp">
+      <Filter>sdk\mathlib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\mathlib\transform.cpp">
+      <Filter>sdk\mathlib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\tier0\threadtools.cpp">
+      <Filter>sdk\tier0</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\client\cdll_engine_int.h">
@ -1565,9 +1580,6 @@
    <ClInclude Include="..\mathlib\ssemath.h">
      <Filter>sdk\mathlib</Filter>
    </ClInclude>
-    <ClInclude Include="..\mathlib\ssenoise.h">
-      <Filter>sdk\mathlib</Filter>
-    </ClInclude>
    <ClInclude Include="..\mathlib\ssequaternion.h">
      <Filter>sdk\mathlib</Filter>
    </ClInclude>
@ -1598,6 +1610,12 @@
    <ClInclude Include="..\tier2\meshutils.h">
      <Filter>sdk\tier2</Filter>
    </ClInclude>
+    <ClInclude Include="..\mathlib\fltx4.h">
+      <Filter>sdk\mathlib</Filter>
+    </ClInclude>
+    <ClInclude Include="..\mathlib\transform.h">
+      <Filter>sdk\mathlib</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <Image Include="..\shared\resource\lockedserver.png">
--- a/r5dev/vproj/dedicated.vcxproj
+++ b/r5dev/vproj/dedicated.vcxproj
@ -188,6 +188,7 @@
    <ClInclude Include="..\mathlib\bits.h" />
    <ClInclude Include="..\mathlib\color.h" />
    <ClInclude Include="..\mathlib\crc32.h" />
+    <ClInclude Include="..\mathlib\fltx4.h" />
    <ClInclude Include="..\mathlib\halton.h" />
    <ClInclude Include="..\mathlib\IceKey.H" />
    <ClInclude Include="..\mathlib\mathlib.h" />
@ -197,9 +198,9 @@
    <ClInclude Include="..\mathlib\sha1.h" />
    <ClInclude Include="..\mathlib\sha256.h" />
    <ClInclude Include="..\mathlib\ssemath.h" />
-    <ClInclude Include="..\mathlib\ssenoise.h" />
    <ClInclude Include="..\mathlib\ssequaternion.h" />
    <ClInclude Include="..\mathlib\swap.h" />
+    <ClInclude Include="..\mathlib\transform.h" />
    <ClInclude Include="..\mathlib\vector.h" />
    <ClInclude Include="..\mathlib\vector2d.h" />
    <ClInclude Include="..\mathlib\vector4d.h" />
@ -448,6 +449,7 @@
    <ClInclude Include="..\windows\system.h" />
  </ItemGroup>
  <ItemGroup>
+    <ClCompile Include="..\bonesetup\bone_utils.cpp" />
    <ClCompile Include="..\bsplib\bsplib.cpp" />
    <ClCompile Include="..\client\vengineclient_impl.cpp" />
    <ClCompile Include="..\common\opcodes.cpp" />
@ -501,6 +503,8 @@
    <ClCompile Include="..\mathlib\sha1.cpp" />
    <ClCompile Include="..\mathlib\sha256.cpp" />
    <ClCompile Include="..\mathlib\sseconst.cpp" />
+    <ClCompile Include="..\mathlib\ssenoise.cpp" />
+    <ClCompile Include="..\mathlib\transform.cpp" />
    <ClCompile Include="..\mathlib\vmatrix.cpp" />
    <ClCompile Include="..\networksystem\pylon.cpp" />
    <ClCompile Include="..\protoc\cl_rcon.pb.cc">
@ -532,6 +536,7 @@
    <ClCompile Include="..\tier0\fasttimer.cpp" />
    <ClCompile Include="..\tier0\jobthread.cpp" />
    <ClCompile Include="..\tier0\platform.cpp" />
+    <ClCompile Include="..\tier0\threadtools.cpp" />
    <ClCompile Include="..\tier0\tslist.cpp" />
    <ClCompile Include="..\tier1\bitbuf.cpp" />
    <ClCompile Include="..\tier1\cmd.cpp" />
--- a/r5dev/vproj/dedicated.vcxproj.filters
+++ b/r5dev/vproj/dedicated.vcxproj.filters
@ -187,6 +187,9 @@
    <Filter Include="sdk\engine\client">
      <UniqueIdentifier>{98975892-5379-4f6c-8c7e-35d92d2bc5e5}</UniqueIdentifier>
    </Filter>
+    <Filter Include="sdk\bonesetup">
+      <UniqueIdentifier>{d49ec580-58c2-49e7-8e83-957da576febd}</UniqueIdentifier>
+    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\common\opcodes.h">
@ -1131,9 +1134,6 @@
    <ClInclude Include="..\mathlib\ssemath.h">
      <Filter>sdk\mathlib</Filter>
    </ClInclude>
-    <ClInclude Include="..\mathlib\ssenoise.h">
-      <Filter>sdk\mathlib</Filter>
-    </ClInclude>
    <ClInclude Include="..\mathlib\ssequaternion.h">
      <Filter>sdk\mathlib</Filter>
    </ClInclude>
@ -1161,6 +1161,12 @@
    <ClInclude Include="..\vstdlib\random.h">
      <Filter>sdk\vstdlib</Filter>
    </ClInclude>
+    <ClInclude Include="..\mathlib\fltx4.h">
+      <Filter>sdk\mathlib</Filter>
+    </ClInclude>
+    <ClInclude Include="..\mathlib\transform.h">
+      <Filter>sdk\mathlib</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\common\opcodes.cpp">
@ -1448,6 +1454,18 @@
    <ClCompile Include="..\vstdlib\random.cpp">
      <Filter>sdk\vstdlib</Filter>
    </ClCompile>
+    <ClCompile Include="..\bonesetup\bone_utils.cpp">
+      <Filter>sdk\bonesetup</Filter>
+    </ClCompile>
+    <ClCompile Include="..\mathlib\ssenoise.cpp">
+      <Filter>sdk\mathlib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\mathlib\transform.cpp">
+      <Filter>sdk\mathlib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\tier0\threadtools.cpp">
+      <Filter>sdk\tier0</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <None Include="..\Dedicated.def" />
--- a/r5dev/vproj/gamesdk.vcxproj
+++ b/r5dev/vproj/gamesdk.vcxproj
@ -11,6 +11,7 @@
    </ProjectConfiguration>
  </ItemGroup>
  <ItemGroup>
+    <ClCompile Include="..\bonesetup\bone_utils.cpp" />
    <ClCompile Include="..\bsplib\bsplib.cpp" />
    <ClCompile Include="..\client\cdll_engine_int.cpp" />
    <ClCompile Include="..\client\vengineclient_impl.cpp" />
@ -76,6 +77,8 @@
    <ClCompile Include="..\mathlib\sha1.cpp" />
    <ClCompile Include="..\mathlib\sha256.cpp" />
    <ClCompile Include="..\mathlib\sseconst.cpp" />
+    <ClCompile Include="..\mathlib\ssenoise.cpp" />
+    <ClCompile Include="..\mathlib\transform.cpp" />
    <ClCompile Include="..\mathlib\vmatrix.cpp" />
    <ClCompile Include="..\networksystem\pylon.cpp" />
    <ClCompile Include="..\protoc\cl_rcon.pb.cc">
@ -109,6 +112,7 @@
    <ClCompile Include="..\tier0\fasttimer.cpp" />
    <ClCompile Include="..\tier0\jobthread.cpp" />
    <ClCompile Include="..\tier0\platform.cpp" />
+    <ClCompile Include="..\tier0\threadtools.cpp" />
    <ClCompile Include="..\tier0\tslist.cpp" />
    <ClCompile Include="..\tier1\bitbuf.cpp" />
    <ClCompile Include="..\tier1\cmd.cpp" />
@ -219,6 +223,8 @@
    <ClInclude Include="..\mathlib\bits.h" />
    <ClInclude Include="..\mathlib\color.h" />
    <ClInclude Include="..\mathlib\crc32.h" />
+    <ClInclude Include="..\mathlib\float_tools.h" />
+    <ClInclude Include="..\mathlib\fltx4.h" />
    <ClInclude Include="..\mathlib\halton.h" />
    <ClInclude Include="..\mathlib\IceKey.H" />
    <ClInclude Include="..\mathlib\mathlib.h" />
@ -228,9 +234,9 @@
    <ClInclude Include="..\mathlib\sha1.h" />
    <ClInclude Include="..\mathlib\sha256.h" />
    <ClInclude Include="..\mathlib\ssemath.h" />
-    <ClInclude Include="..\mathlib\ssenoise.h" />
    <ClInclude Include="..\mathlib\ssequaternion.h" />
    <ClInclude Include="..\mathlib\swap.h" />
+    <ClInclude Include="..\mathlib\transform.h" />
    <ClInclude Include="..\mathlib\vector.h" />
    <ClInclude Include="..\mathlib\vector2d.h" />
    <ClInclude Include="..\mathlib\vector4d.h" />
--- a/r5dev/vproj/gamesdk.vcxproj.filters
+++ b/r5dev/vproj/gamesdk.vcxproj.filters
@ -220,6 +220,9 @@
    <Filter Include="sdk\engine\client">
      <UniqueIdentifier>{b7e33427-fd37-44b1-8530-651ae5f4fde1}</UniqueIdentifier>
    </Filter>
+    <Filter Include="sdk\bonesetup">
+      <UniqueIdentifier>{acbd4b45-6a8d-4d9f-9747-1bc460481bb4}</UniqueIdentifier>
+    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\client\vengineclient_impl.cpp">
@ -576,6 +579,18 @@
    <ClCompile Include="..\tier2\meshutils.cpp">
      <Filter>sdk\tier2</Filter>
    </ClCompile>
+    <ClCompile Include="..\bonesetup\bone_utils.cpp">
+      <Filter>sdk\bonesetup</Filter>
+    </ClCompile>
+    <ClCompile Include="..\mathlib\transform.cpp">
+      <Filter>sdk\mathlib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\mathlib\ssenoise.cpp">
+      <Filter>sdk\mathlib</Filter>
+    </ClCompile>
+    <ClCompile Include="..\tier0\threadtools.cpp">
+      <Filter>sdk\tier0</Filter>
+    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\client\cdll_engine_int.h">
@ -1637,9 +1652,6 @@
    <ClInclude Include="..\mathlib\ssemath.h">
      <Filter>sdk\mathlib</Filter>
    </ClInclude>
-    <ClInclude Include="..\mathlib\ssenoise.h">
-      <Filter>sdk\mathlib</Filter>
-    </ClInclude>
    <ClInclude Include="..\mathlib\noisedata.h">
      <Filter>sdk\mathlib</Filter>
    </ClInclude>
@ -1667,6 +1679,15 @@
    <ClInclude Include="..\tier2\meshutils.h">
      <Filter>sdk\tier2</Filter>
    </ClInclude>
+    <ClInclude Include="..\mathlib\float_tools.h">
+      <Filter>sdk\mathlib</Filter>
+    </ClInclude>
+    <ClInclude Include="..\mathlib\fltx4.h">
+      <Filter>sdk\mathlib</Filter>
+    </ClInclude>
+    <ClInclude Include="..\mathlib\transform.h">
+      <Filter>sdk\mathlib</Filter>
+    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <Image Include="..\shared\resource\lockedserver.png">
--- a/r5dev/vstdlib/random.h
+++ b/r5dev/vstdlib/random.h
@ -10,7 +10,6 @@
 #define VSTDLIB_RANDOM_H

 #include "tier0/basetypes.h"
-#include "tier0/threadtools.h"

 #define NTAB 32