2022-07-08 00:55:01 +02:00
|
|
|
|
//===== Copyright <20> 1996-2005, Valve Corporation, All rights reserved. ======//
|
2022-07-06 21:11:32 +02:00
|
|
|
|
//
|
|
|
|
|
// Purpose: - defines SIMD "structure of arrays" classes and functions.
|
|
|
|
|
//
|
|
|
|
|
//===========================================================================//
|
|
|
|
|
#ifndef SSEQUATMATH_H
|
|
|
|
|
#define SSEQUATMATH_H
|
|
|
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|
#pragma once
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "mathlib/ssemath.h"
|
|
|
|
|
|
|
|
|
|
// Use this #define to allow SSE versions of Quaternion math
|
|
|
|
|
// to exist on PC.
|
|
|
|
|
// On PC, certain horizontal vector operations are not supported.
|
|
|
|
|
// This causes the SSE implementation of quaternion math to mix the
|
|
|
|
|
// vector and scalar floating point units, which is extremely
|
|
|
|
|
// performance negative if you don't compile to native SSE2 (which
|
|
|
|
|
// we don't as of Sept 1, 2007). So, it's best not to allow these
|
|
|
|
|
// functions to exist at all. It's not good enough to simply replace
|
|
|
|
|
// the contents of the functions with scalar math, because each call
|
2022-09-09 19:47:31 +02:00
|
|
|
|
// to LoadAligned and StoreAligned will result in an unnecessary copy
|
2022-07-06 21:11:32 +02:00
|
|
|
|
// of the quaternion, and several moves to and from the XMM registers.
|
|
|
|
|
//
|
|
|
|
|
// Basically, the problem you run into is that for efficient SIMD code,
|
|
|
|
|
// you need to load the quaternions and vectors into SIMD registers and
|
|
|
|
|
// keep them there as long as possible while doing only SIMD math,
|
|
|
|
|
// whereas for efficient scalar code, each time you copy onto or ever
|
|
|
|
|
// use a fltx4, it hoses your pipeline. So the difference has to be
|
|
|
|
|
// in the management of temporary variables in the calling function,
|
|
|
|
|
// not inside the math functions.
|
|
|
|
|
//
|
|
|
|
|
// If you compile assuming the presence of SSE2, the MSVC will abandon
|
|
|
|
|
// the traditional x87 FPU operations altogether and make everything use
|
|
|
|
|
// the SSE2 registers, which lessens this problem a little.
|
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
// permitted only on 360, as we've done careful tuning on its Altivec math.
|
|
|
|
|
// FourQuaternions, however, are always allowed, because vertical ops are
|
|
|
|
|
// fine on SSE.
|
|
|
|
|
#ifdef PLATFORM_PPC
|
2022-07-06 21:11:32 +02:00
|
|
|
|
#define ALLOW_SIMD_QUATERNION_MATH 1 // not on PC!
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
// Load/store quaternions
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
#ifndef _X360
|
|
|
|
|
// Using STDC or SSE
|
|
|
|
|
FORCEINLINE fltx4 LoadAlignedSIMD(const QuaternionAligned& pSIMD)
|
|
|
|
|
{
|
|
|
|
|
fltx4 retval = LoadAlignedSIMD(pSIMD.Base());
|
|
|
|
|
return retval;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE fltx4 LoadAlignedSIMD(const QuaternionAligned* RESTRICT pSIMD)
|
|
|
|
|
{
|
2022-07-08 00:55:01 +02:00
|
|
|
|
fltx4 retval = LoadAlignedSIMD(pSIMD->Base());
|
2022-07-06 21:11:32 +02:00
|
|
|
|
return retval;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE void StoreAlignedSIMD(QuaternionAligned* RESTRICT pSIMD, const fltx4& a)
|
|
|
|
|
{
|
|
|
|
|
StoreAlignedSIMD(pSIMD->Base(), a);
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
// for the transitional class -- load a QuaternionAligned
|
|
|
|
|
FORCEINLINE fltx4 LoadAlignedSIMD(const QuaternionAligned& pSIMD)
|
|
|
|
|
{
|
|
|
|
|
fltx4 retval = XMLoadVector4A(pSIMD.Base());
|
|
|
|
|
return retval;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE fltx4 LoadAlignedSIMD(const QuaternionAligned* RESTRICT pSIMD)
|
|
|
|
|
{
|
|
|
|
|
fltx4 retval = XMLoadVector4A(pSIMD);
|
|
|
|
|
return retval;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE void StoreAlignedSIMD(QuaternionAligned* RESTRICT pSIMD, const fltx4& a)
|
|
|
|
|
{
|
|
|
|
|
XMStoreVector4A(pSIMD->Base(), a);
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
// From a RadianEuler packed onto a fltx4, to a quaternion
|
|
|
|
|
fltx4 AngleQuaternionSIMD(FLTX4 vAngles);
|
|
|
|
|
|
2022-07-06 21:11:32 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if ALLOW_SIMD_QUATERNION_MATH
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
// Make sure quaternions are within 180 degrees of one another, if not, reverse q
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
FORCEINLINE fltx4 QuaternionAlignSIMD(const fltx4& p, const fltx4& q)
|
|
|
|
|
{
|
|
|
|
|
// decide if one of the quaternions is backwards
|
|
|
|
|
fltx4 a = SubSIMD(p, q);
|
|
|
|
|
fltx4 b = AddSIMD(p, q);
|
|
|
|
|
a = Dot4SIMD(a, a);
|
|
|
|
|
b = Dot4SIMD(b, b);
|
2022-07-08 00:55:01 +02:00
|
|
|
|
fltx4 cmp = (fltx4)CmpGtSIMD(a, b);
|
2022-07-06 21:11:32 +02:00
|
|
|
|
fltx4 result = MaskedAssign(cmp, NegSIMD(q), q);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
// Normalize Quaternion
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
#if USE_STDC_FOR_SIMD
|
|
|
|
|
|
|
|
|
|
FORCEINLINE fltx4 QuaternionNormalizeSIMD(const fltx4& q)
|
|
|
|
|
{
|
|
|
|
|
fltx4 radius, result;
|
|
|
|
|
radius = Dot4SIMD(q, q);
|
|
|
|
|
|
|
|
|
|
if (SubFloat(radius, 0)) // > FLT_EPSILON && ((radius < 1.0f - 4*FLT_EPSILON) || (radius > 1.0f + 4*FLT_EPSILON))
|
|
|
|
|
{
|
|
|
|
|
float iradius = 1.0f / sqrt(SubFloat(radius, 0));
|
|
|
|
|
result = ReplicateX4(iradius);
|
|
|
|
|
result = MulSIMD(result, q);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
return q;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
// SSE + X360 implementation
|
|
|
|
|
FORCEINLINE fltx4 QuaternionNormalizeSIMD(const fltx4& q)
|
|
|
|
|
{
|
|
|
|
|
fltx4 radius, result, mask;
|
|
|
|
|
radius = Dot4SIMD(q, q);
|
2022-07-08 00:55:01 +02:00
|
|
|
|
mask = (fltx4)CmpEqSIMD(radius, Four_Zeros); // all ones iff radius = 0
|
2022-07-06 21:11:32 +02:00
|
|
|
|
result = ReciprocalSqrtSIMD(radius);
|
|
|
|
|
result = MulSIMD(result, q);
|
|
|
|
|
return MaskedAssign(mask, q, result); // if radius was 0, just return q
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
// 0.0 returns p, 1.0 return q.
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
FORCEINLINE fltx4 QuaternionBlendNoAlignSIMD(const fltx4& p, const fltx4& q, float t)
|
|
|
|
|
{
|
|
|
|
|
fltx4 sclp, sclq, result;
|
|
|
|
|
sclq = ReplicateX4(t);
|
|
|
|
|
sclp = SubSIMD(Four_Ones, sclq);
|
|
|
|
|
result = MulSIMD(sclp, p);
|
|
|
|
|
result = MaddSIMD(sclq, q, result);
|
|
|
|
|
return QuaternionNormalizeSIMD(result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
// Blend Quaternions
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
FORCEINLINE fltx4 QuaternionBlendSIMD(const fltx4& p, const fltx4& q, float t)
|
|
|
|
|
{
|
|
|
|
|
// decide if one of the quaternions is backwards
|
|
|
|
|
fltx4 q2, result;
|
|
|
|
|
q2 = QuaternionAlignSIMD(p, q);
|
|
|
|
|
result = QuaternionBlendNoAlignSIMD(p, q2, t);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
// Multiply Quaternions
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
#ifndef _X360
|
|
|
|
|
|
|
|
|
|
// SSE and STDC
|
|
|
|
|
FORCEINLINE fltx4 QuaternionMultSIMD(const fltx4& p, const fltx4& q)
|
|
|
|
|
{
|
|
|
|
|
// decide if one of the quaternions is backwards
|
|
|
|
|
fltx4 q2, result;
|
|
|
|
|
q2 = QuaternionAlignSIMD(p, q);
|
|
|
|
|
SubFloat(result, 0) = SubFloat(p, 0) * SubFloat(q2, 3) + SubFloat(p, 1) * SubFloat(q2, 2) - SubFloat(p, 2) * SubFloat(q2, 1) + SubFloat(p, 3) * SubFloat(q2, 0);
|
|
|
|
|
SubFloat(result, 1) = -SubFloat(p, 0) * SubFloat(q2, 2) + SubFloat(p, 1) * SubFloat(q2, 3) + SubFloat(p, 2) * SubFloat(q2, 0) + SubFloat(p, 3) * SubFloat(q2, 1);
|
|
|
|
|
SubFloat(result, 2) = SubFloat(p, 0) * SubFloat(q2, 1) - SubFloat(p, 1) * SubFloat(q2, 0) + SubFloat(p, 2) * SubFloat(q2, 3) + SubFloat(p, 3) * SubFloat(q2, 2);
|
|
|
|
|
SubFloat(result, 3) = -SubFloat(p, 0) * SubFloat(q2, 0) - SubFloat(p, 1) * SubFloat(q2, 1) - SubFloat(p, 2) * SubFloat(q2, 2) + SubFloat(p, 3) * SubFloat(q2, 3);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
// X360
|
|
|
|
|
extern const fltx4 g_QuatMultRowSign[4];
|
|
|
|
|
FORCEINLINE fltx4 QuaternionMultSIMD(const fltx4& p, const fltx4& q)
|
|
|
|
|
{
|
|
|
|
|
fltx4 q2, row, result;
|
|
|
|
|
q2 = QuaternionAlignSIMD(p, q);
|
|
|
|
|
|
|
|
|
|
row = XMVectorSwizzle(q2, 3, 2, 1, 0);
|
|
|
|
|
row = MulSIMD(row, g_QuatMultRowSign[0]);
|
|
|
|
|
result = Dot4SIMD(row, p);
|
|
|
|
|
|
|
|
|
|
row = XMVectorSwizzle(q2, 2, 3, 0, 1);
|
|
|
|
|
row = MulSIMD(row, g_QuatMultRowSign[1]);
|
|
|
|
|
row = Dot4SIMD(row, p);
|
|
|
|
|
result = __vrlimi(result, row, 4, 0);
|
|
|
|
|
|
|
|
|
|
row = XMVectorSwizzle(q2, 1, 0, 3, 2);
|
|
|
|
|
row = MulSIMD(row, g_QuatMultRowSign[2]);
|
|
|
|
|
row = Dot4SIMD(row, p);
|
|
|
|
|
result = __vrlimi(result, row, 2, 0);
|
|
|
|
|
|
|
|
|
|
row = MulSIMD(q2, g_QuatMultRowSign[3]);
|
|
|
|
|
row = Dot4SIMD(row, p);
|
|
|
|
|
result = __vrlimi(result, row, 1, 0);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------
|
|
|
|
|
// Quaternion scale
|
|
|
|
|
//---------------------------------------------------------------------
|
2022-07-08 00:55:01 +02:00
|
|
|
|
#ifdef _X360
|
2022-07-06 21:11:32 +02:00
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
// X360
|
2022-07-06 21:11:32 +02:00
|
|
|
|
FORCEINLINE fltx4 QuaternionScaleSIMD(const fltx4& p, float t)
|
|
|
|
|
{
|
2022-07-08 00:55:01 +02:00
|
|
|
|
fltx4 sinom = Dot3SIMD(p, p);
|
|
|
|
|
sinom = SqrtSIMD(sinom);
|
|
|
|
|
sinom = MinSIMD(sinom, Four_Ones);
|
|
|
|
|
fltx4 sinsom = ArcSinSIMD(sinom);
|
|
|
|
|
fltx4 t4 = ReplicateX4(t);
|
|
|
|
|
sinsom = MulSIMD(sinsom, t4);
|
|
|
|
|
sinsom = SinSIMD(sinsom);
|
|
|
|
|
sinom = AddSIMD(sinom, Four_Epsilons);
|
|
|
|
|
sinom = ReciprocalSIMD(sinom);
|
|
|
|
|
t4 = MulSIMD(sinsom, sinom);
|
|
|
|
|
fltx4 result = MulSIMD(p, t4);
|
2022-07-06 21:11:32 +02:00
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
// rescale rotation
|
|
|
|
|
sinsom = MulSIMD(sinsom, sinsom);
|
|
|
|
|
fltx4 r = SubSIMD(Four_Ones, sinsom);
|
|
|
|
|
r = MaxSIMD(r, Four_Zeros);
|
|
|
|
|
r = SqrtSIMD(r);
|
2022-07-06 21:11:32 +02:00
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
// keep sign of rotation
|
|
|
|
|
fltx4 cmp = CmpGeSIMD(p, Four_Zeros);
|
|
|
|
|
r = MaskedAssign(cmp, r, NegSIMD(r));
|
2022-07-06 21:11:32 +02:00
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
result = __vrlimi(result, r, 1, 0);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
2022-07-06 21:11:32 +02:00
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
// X360
|
|
|
|
|
// assumes t4 contains a float replicated to each slot
|
|
|
|
|
FORCEINLINE fltx4 QuaternionScaleSIMD(const fltx4& p, const fltx4& t4)
|
|
|
|
|
{
|
|
|
|
|
fltx4 sinom = Dot3SIMD(p, p);
|
|
|
|
|
sinom = SqrtSIMD(sinom);
|
|
|
|
|
sinom = MinSIMD(sinom, Four_Ones);
|
|
|
|
|
fltx4 sinsom = ArcSinSIMD(sinom);
|
|
|
|
|
sinsom = MulSIMD(sinsom, t4);
|
|
|
|
|
sinsom = SinSIMD(sinsom);
|
|
|
|
|
sinom = AddSIMD(sinom, Four_Epsilons);
|
|
|
|
|
sinom = ReciprocalSIMD(sinom);
|
|
|
|
|
fltx4 result = MulSIMD(p, MulSIMD(sinsom, sinom));
|
2022-07-06 21:11:32 +02:00
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
// rescale rotation
|
|
|
|
|
sinsom = MulSIMD(sinsom, sinsom);
|
|
|
|
|
fltx4 r = SubSIMD(Four_Ones, sinsom);
|
|
|
|
|
r = MaxSIMD(r, Four_Zeros);
|
|
|
|
|
r = SqrtSIMD(r);
|
2022-07-06 21:11:32 +02:00
|
|
|
|
|
|
|
|
|
// keep sign of rotation
|
2022-07-08 00:55:01 +02:00
|
|
|
|
fltx4 cmp = CmpGeSIMD(p, Four_Zeros);
|
|
|
|
|
r = MaskedAssign(cmp, r, NegSIMD(r));
|
|
|
|
|
|
|
|
|
|
result = __vrlimi(result, r, 1, 0);
|
|
|
|
|
return result;
|
2022-07-06 21:11:32 +02:00
|
|
|
|
}
|
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
#elif defined(_PS3)
|
2022-07-06 21:11:32 +02:00
|
|
|
|
|
|
|
|
|
// X360
|
|
|
|
|
FORCEINLINE fltx4 QuaternionScaleSIMD(const fltx4& p, float t)
|
|
|
|
|
{
|
|
|
|
|
fltx4 sinom = Dot3SIMD(p, p);
|
|
|
|
|
sinom = SqrtSIMD(sinom);
|
|
|
|
|
sinom = MinSIMD(sinom, Four_Ones);
|
|
|
|
|
fltx4 sinsom = ArcSinSIMD(sinom);
|
|
|
|
|
fltx4 t4 = ReplicateX4(t);
|
|
|
|
|
sinsom = MulSIMD(sinsom, t4);
|
|
|
|
|
sinsom = SinSIMD(sinsom);
|
|
|
|
|
sinom = AddSIMD(sinom, Four_Epsilons);
|
|
|
|
|
sinom = ReciprocalSIMD(sinom);
|
|
|
|
|
t4 = MulSIMD(sinsom, sinom);
|
|
|
|
|
fltx4 result = MulSIMD(p, t4);
|
|
|
|
|
|
|
|
|
|
// rescale rotation
|
|
|
|
|
sinsom = MulSIMD(sinsom, sinsom);
|
|
|
|
|
fltx4 r = SubSIMD(Four_Ones, sinsom);
|
|
|
|
|
r = MaxSIMD(r, Four_Zeros);
|
|
|
|
|
r = SqrtSIMD(r);
|
|
|
|
|
|
|
|
|
|
// keep sign of rotation
|
2022-07-08 00:55:01 +02:00
|
|
|
|
r = MaskedAssign(CmpGeSIMD(p, Four_Zeros), r, NegSIMD(r));
|
|
|
|
|
// set just the w component of result
|
|
|
|
|
result = MaskedAssign(LoadAlignedSIMD(g_SIMD_ComponentMask[3]), r, result);
|
2022-07-06 21:11:32 +02:00
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
// X360
|
|
|
|
|
// assumes t4 contains a float replicated to each slot
|
|
|
|
|
FORCEINLINE fltx4 QuaternionScaleSIMD(const fltx4& p, const fltx4& t4)
|
|
|
|
|
{
|
|
|
|
|
fltx4 sinom = Dot3SIMD(p, p);
|
|
|
|
|
sinom = SqrtSIMD(sinom);
|
|
|
|
|
sinom = MinSIMD(sinom, Four_Ones);
|
|
|
|
|
fltx4 sinsom = ArcSinSIMD(sinom);
|
|
|
|
|
sinsom = MulSIMD(sinsom, t4);
|
|
|
|
|
sinsom = SinSIMD(sinsom);
|
|
|
|
|
sinom = AddSIMD(sinom, Four_Epsilons);
|
|
|
|
|
sinom = ReciprocalSIMD(sinom);
|
|
|
|
|
fltx4 result = MulSIMD(p, MulSIMD(sinsom, sinom));
|
|
|
|
|
|
|
|
|
|
// rescale rotation
|
|
|
|
|
sinsom = MulSIMD(sinsom, sinsom);
|
|
|
|
|
fltx4 r = SubSIMD(Four_Ones, sinsom);
|
|
|
|
|
r = MaxSIMD(r, Four_Zeros);
|
|
|
|
|
r = SqrtSIMD(r);
|
|
|
|
|
|
|
|
|
|
// keep sign of rotation
|
|
|
|
|
r = MaskedAssign(CmpGeSIMD(p, Four_Zeros), r, NegSIMD(r));
|
|
|
|
|
// set just the w component of result
|
|
|
|
|
result = MaskedAssign(LoadAlignedSIMD(g_SIMD_ComponentMask[3]), r, result);
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
// SSE and STDC
|
|
|
|
|
FORCEINLINE fltx4 QuaternionScaleSIMD(const fltx4& p, float t)
|
|
|
|
|
{
|
|
|
|
|
float r;
|
|
|
|
|
fltx4 q;
|
|
|
|
|
|
|
|
|
|
// FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
|
|
|
|
|
// use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
|
|
|
|
|
float sinom = sqrt(SubFloat(p, 0) * SubFloat(p, 0) + SubFloat(p, 1) * SubFloat(p, 1) + SubFloat(p, 2) * SubFloat(p, 2));
|
|
|
|
|
sinom = fmin(sinom, 1.f);
|
|
|
|
|
|
|
|
|
|
float sinsom = sin(asin(sinom) * t);
|
|
|
|
|
|
|
|
|
|
t = sinsom / (sinom + FLT_EPSILON);
|
|
|
|
|
SubFloat(q, 0) = t * SubFloat(p, 0);
|
|
|
|
|
SubFloat(q, 1) = t * SubFloat(p, 1);
|
|
|
|
|
SubFloat(q, 2) = t * SubFloat(p, 2);
|
|
|
|
|
|
|
|
|
|
// rescale rotation
|
|
|
|
|
r = 1.0f - sinsom * sinsom;
|
|
|
|
|
|
|
|
|
|
// Assert( r >= 0 );
|
|
|
|
|
if (r < 0.0f)
|
|
|
|
|
r = 0.0f;
|
|
|
|
|
r = sqrt(r);
|
|
|
|
|
|
|
|
|
|
// keep sign of rotation
|
|
|
|
|
SubFloat(q, 3) = fsel(SubFloat(p, 3), r, -r);
|
|
|
|
|
return q;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-06 21:11:32 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
2022-09-09 19:47:31 +02:00
|
|
|
|
// Quaternion spherical linear interpolation
|
2022-07-06 21:11:32 +02:00
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
|
#ifndef _X360
|
|
|
|
|
|
|
|
|
|
// SSE and STDC
|
|
|
|
|
FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD(const fltx4& p, const fltx4& q, float t)
|
|
|
|
|
{
|
|
|
|
|
float omega, cosom, sinom, sclp, sclq;
|
|
|
|
|
|
|
|
|
|
fltx4 result;
|
|
|
|
|
|
|
|
|
|
// 0.0 returns p, 1.0 return q.
|
|
|
|
|
cosom = SubFloat(p, 0) * SubFloat(q, 0) + SubFloat(p, 1) * SubFloat(q, 1) +
|
|
|
|
|
SubFloat(p, 2) * SubFloat(q, 2) + SubFloat(p, 3) * SubFloat(q, 3);
|
|
|
|
|
|
|
|
|
|
if ((1.0f + cosom) > 0.000001f)
|
|
|
|
|
{
|
|
|
|
|
if ((1.0f - cosom) > 0.000001f)
|
|
|
|
|
{
|
|
|
|
|
omega = acos(cosom);
|
|
|
|
|
sinom = sin(omega);
|
|
|
|
|
sclp = sin((1.0f - t) * omega) / sinom;
|
|
|
|
|
sclq = sin(t * omega) / sinom;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
// TODO: add short circuit for cosom == 1.0f?
|
|
|
|
|
sclp = 1.0f - t;
|
|
|
|
|
sclq = t;
|
|
|
|
|
}
|
|
|
|
|
SubFloat(result, 0) = sclp * SubFloat(p, 0) + sclq * SubFloat(q, 0);
|
|
|
|
|
SubFloat(result, 1) = sclp * SubFloat(p, 1) + sclq * SubFloat(q, 1);
|
|
|
|
|
SubFloat(result, 2) = sclp * SubFloat(p, 2) + sclq * SubFloat(q, 2);
|
|
|
|
|
SubFloat(result, 3) = sclp * SubFloat(p, 3) + sclq * SubFloat(q, 3);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
SubFloat(result, 0) = -SubFloat(q, 1);
|
|
|
|
|
SubFloat(result, 1) = SubFloat(q, 0);
|
|
|
|
|
SubFloat(result, 2) = -SubFloat(q, 3);
|
|
|
|
|
SubFloat(result, 3) = SubFloat(q, 2);
|
2024-02-25 23:41:38 +01:00
|
|
|
|
sclp = (float)sin((1.0f - t) * (0.5f * M_PI));
|
|
|
|
|
sclq = (float)sin(t * (0.5f * M_PI));
|
2022-07-06 21:11:32 +02:00
|
|
|
|
SubFloat(result, 0) = sclp * SubFloat(p, 0) + sclq * SubFloat(result, 0);
|
|
|
|
|
SubFloat(result, 1) = sclp * SubFloat(p, 1) + sclq * SubFloat(result, 1);
|
|
|
|
|
SubFloat(result, 2) = sclp * SubFloat(p, 2) + sclq * SubFloat(result, 2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
// X360
|
|
|
|
|
FORCEINLINE fltx4 QuaternionSlerpNoAlignSIMD(const fltx4& p, const fltx4& q, float t)
|
|
|
|
|
{
|
|
|
|
|
return XMQuaternionSlerp(p, q, t);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FORCEINLINE fltx4 QuaternionSlerpSIMD(const fltx4& p, const fltx4& q, float t)
|
|
|
|
|
{
|
|
|
|
|
fltx4 q2, result;
|
|
|
|
|
q2 = QuaternionAlignSIMD(p, q);
|
|
|
|
|
result = QuaternionSlerpNoAlignSIMD(p, q2, t);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif // ALLOW_SIMD_QUATERNION_MATH
|
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
|
|
|
|
|
/// class FourVectors stores 4 independent vectors for use in SIMD processing. These vectors are
|
|
|
|
|
/// stored in the format x x x x y y y y z z z z so that they can be efficiently SIMD-accelerated.
|
|
|
|
|
class ALIGN16 FourQuaternions
|
|
|
|
|
{
|
|
|
|
|
public:
|
|
|
|
|
fltx4 x, y, z, w;
|
|
|
|
|
|
|
|
|
|
FourQuaternions(void)
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FourQuaternions(const fltx4& _x,
|
|
|
|
|
const fltx4& _y,
|
|
|
|
|
const fltx4& _z,
|
|
|
|
|
const fltx4& _w)
|
|
|
|
|
: x(_x), y(_y), z(_z), w(_w)
|
|
|
|
|
{}
|
|
|
|
|
|
|
|
|
|
#if !defined(__SPU__)
|
|
|
|
|
// four rotations around the same axis. angles should be in radians.
|
|
|
|
|
FourQuaternions(const fltx4& axis,
|
|
|
|
|
const float& angle0, const float& angle1, const float& angle2, const float& angle3)
|
|
|
|
|
{
|
|
|
|
|
FromAxisAndAngles(axis, angle0, angle1, angle2, angle3);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
FourQuaternions(FourQuaternions const& src)
|
|
|
|
|
{
|
|
|
|
|
x = src.x;
|
|
|
|
|
y = src.y;
|
|
|
|
|
z = src.z;
|
|
|
|
|
w = src.w;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE void operator=(FourQuaternions const& src)
|
|
|
|
|
{
|
|
|
|
|
x = src.x;
|
|
|
|
|
y = src.y;
|
|
|
|
|
z = src.z;
|
|
|
|
|
w = src.w;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// this = this * q;
|
|
|
|
|
FORCEINLINE FourQuaternions Mul(FourQuaternions const& q) const;
|
|
|
|
|
|
|
|
|
|
/// negate the vector part
|
|
|
|
|
FORCEINLINE FourQuaternions Conjugate() const;
|
|
|
|
|
|
|
|
|
|
/// for a quaternion representing a rotation of angle theta, return
|
|
|
|
|
/// one of angle s*theta
|
|
|
|
|
/// scale is four floats -- one for each quat
|
|
|
|
|
FORCEINLINE FourQuaternions ScaleAngle(const fltx4& scale) const;
|
|
|
|
|
|
|
|
|
|
/// ret = this * ( s * q )
|
|
|
|
|
/// In other words, for a quaternion representing a rotation of angle theta, return
|
|
|
|
|
/// one of angle s*theta
|
|
|
|
|
/// s is four floats in a fltx4 -- one for each quaternion
|
|
|
|
|
FORCEINLINE FourQuaternions MulAc(const fltx4& s, const FourQuaternions& q) const;
|
|
|
|
|
|
|
|
|
|
/// ret = ( s * this ) * q
|
|
|
|
|
FORCEINLINE FourQuaternions ScaleMul(const fltx4& s, const FourQuaternions& q) const;
|
|
|
|
|
|
|
|
|
|
/// Slerp four quaternions at once, FROM me TO the specified out.
|
|
|
|
|
FORCEINLINE FourQuaternions Slerp(const FourQuaternions& to, const fltx4& t);
|
|
|
|
|
|
|
|
|
|
FORCEINLINE FourQuaternions SlerpNoAlign(const FourQuaternions& originalto, const fltx4& t);
|
|
|
|
|
|
|
|
|
|
#if !defined(__SPU__)
|
|
|
|
|
/// given an axis and four angles, populate this quaternion with the equivalent rotations
|
|
|
|
|
/// (ie, make these four quaternions represent four different rotations around the same axis)
|
|
|
|
|
/// angles should be in RADIANS
|
|
|
|
|
FORCEINLINE FourQuaternions& FromAxisAndAngles(const fltx4& axis,
|
|
|
|
|
const float& angle0, const float& angle1, const float& angle2, const float& angle3);
|
|
|
|
|
FORCEINLINE FourQuaternions& FromAxisAndAngles(const fltx4& axis, const fltx4& angles);
|
|
|
|
|
// one convenience imp if you're doing this in degrees
|
|
|
|
|
FORCEINLINE FourQuaternions& FromAxisAndAnglesInDegrees(const fltx4& axis, const fltx4& angles)
|
|
|
|
|
{
|
|
|
|
|
return FromAxisAndAngles(axis, MulSIMD(angles, Four_DegToRad));
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// rotate (in place) a FourVectors by this quaternion. there's a corresponding RotateBy in FourVectors.
|
2023-04-01 22:11:41 +02:00
|
|
|
|
FORCEINLINE void RotateFourVectors(FourVectors* /*RESTRICT*/ vecs) const /*RESTRICT*/;
|
2022-07-08 00:55:01 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// LoadAndSwizzleAligned - load 4 QuaternionAligneds into a FourQuaternions, performing transpose op.
|
|
|
|
|
/// all 4 vectors must be 128 bit boundary
|
|
|
|
|
FORCEINLINE void LoadAndSwizzleAligned(const float* RESTRICT a, const float* RESTRICT b, const float* RESTRICT c, const float* RESTRICT d)
|
|
|
|
|
{
|
|
|
|
|
#if defined( _X360 )
|
|
|
|
|
fltx4 tx = LoadAlignedSIMD(a);
|
|
|
|
|
fltx4 ty = LoadAlignedSIMD(b);
|
|
|
|
|
fltx4 tz = LoadAlignedSIMD(c);
|
|
|
|
|
fltx4 tw = LoadAlignedSIMD(d);
|
|
|
|
|
fltx4 r0 = __vmrghw(tx, tz);
|
|
|
|
|
fltx4 r1 = __vmrghw(ty, tw);
|
|
|
|
|
fltx4 r2 = __vmrglw(tx, tz);
|
|
|
|
|
fltx4 r3 = __vmrglw(ty, tw);
|
|
|
|
|
|
|
|
|
|
x = __vmrghw(r0, r1);
|
|
|
|
|
y = __vmrglw(r0, r1);
|
|
|
|
|
z = __vmrghw(r2, r3);
|
|
|
|
|
w = __vmrglw(r2, r3);
|
|
|
|
|
#else
|
|
|
|
|
x = LoadAlignedSIMD(a);
|
|
|
|
|
y = LoadAlignedSIMD(b);
|
|
|
|
|
z = LoadAlignedSIMD(c);
|
|
|
|
|
w = LoadAlignedSIMD(d);
|
|
|
|
|
// now, matrix is:
|
|
|
|
|
// x y z w
|
|
|
|
|
// x y z w
|
|
|
|
|
// x y z w
|
|
|
|
|
// x y z w
|
|
|
|
|
TransposeSIMD(x, y, z, w);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE void LoadAndSwizzleAligned(const QuaternionAligned* RESTRICT a,
|
|
|
|
|
const QuaternionAligned* RESTRICT b,
|
|
|
|
|
const QuaternionAligned* RESTRICT c,
|
|
|
|
|
const QuaternionAligned* RESTRICT d)
|
|
|
|
|
{
|
|
|
|
|
LoadAndSwizzleAligned(a->Base(), b->Base(), c->Base(), d->Base());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// LoadAndSwizzleAligned - load 4 consecutive QuaternionAligneds into a FourQuaternions,
|
|
|
|
|
/// performing transpose op.
|
|
|
|
|
/// all 4 vectors must be 128 bit boundary
|
|
|
|
|
FORCEINLINE void LoadAndSwizzleAligned(const QuaternionAligned* qs)
|
|
|
|
|
{
|
|
|
|
|
#if defined( _X360 )
|
|
|
|
|
fltx4 tx = LoadAlignedSIMD(qs++);
|
|
|
|
|
fltx4 ty = LoadAlignedSIMD(qs++);
|
|
|
|
|
fltx4 tz = LoadAlignedSIMD(qs++);
|
|
|
|
|
fltx4 tw = LoadAlignedSIMD(qs);
|
|
|
|
|
fltx4 r0 = __vmrghw(tx, tz);
|
|
|
|
|
fltx4 r1 = __vmrghw(ty, tw);
|
|
|
|
|
fltx4 r2 = __vmrglw(tx, tz);
|
|
|
|
|
fltx4 r3 = __vmrglw(ty, tw);
|
|
|
|
|
|
|
|
|
|
x = __vmrghw(r0, r1);
|
|
|
|
|
y = __vmrglw(r0, r1);
|
|
|
|
|
z = __vmrghw(r2, r3);
|
|
|
|
|
w = __vmrglw(r2, r3);
|
|
|
|
|
#else
|
|
|
|
|
x = LoadAlignedSIMD(qs++);
|
|
|
|
|
y = LoadAlignedSIMD(qs++);
|
|
|
|
|
z = LoadAlignedSIMD(qs++);
|
|
|
|
|
w = LoadAlignedSIMD(qs++);
|
|
|
|
|
// now, matrix is:
|
|
|
|
|
// x y z w
|
|
|
|
|
// x y z w
|
|
|
|
|
// x y z w
|
|
|
|
|
// x y z w
|
|
|
|
|
TransposeSIMD(x, y, z, w);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Store the FourQuaternions out to four nonconsecutive ordinary quaternions in memory.
|
|
|
|
|
FORCEINLINE void SwizzleAndStoreAligned(QuaternionAligned* a, QuaternionAligned* b, QuaternionAligned* c, QuaternionAligned* d)
|
|
|
|
|
{
|
|
|
|
|
#if defined( _X360 )
|
|
|
|
|
fltx4 r0 = __vmrghw(x, z);
|
|
|
|
|
fltx4 r1 = __vmrghw(y, w);
|
|
|
|
|
fltx4 r2 = __vmrglw(x, z);
|
|
|
|
|
fltx4 r3 = __vmrglw(y, w);
|
|
|
|
|
|
|
|
|
|
fltx4 rx = __vmrghw(r0, r1);
|
|
|
|
|
fltx4 ry = __vmrglw(r0, r1);
|
|
|
|
|
fltx4 rz = __vmrghw(r2, r3);
|
|
|
|
|
fltx4 rw = __vmrglw(r2, r3);
|
|
|
|
|
|
|
|
|
|
StoreAlignedSIMD(a, rx);
|
|
|
|
|
StoreAlignedSIMD(b, ry);
|
|
|
|
|
StoreAlignedSIMD(c, rz);
|
|
|
|
|
StoreAlignedSIMD(d, rw);
|
|
|
|
|
#else
|
|
|
|
|
fltx4 dupes[4] = { x, y, z, w };
|
|
|
|
|
TransposeSIMD(dupes[0], dupes[1], dupes[2], dupes[3]);
|
|
|
|
|
StoreAlignedSIMD(a, dupes[0]);
|
|
|
|
|
StoreAlignedSIMD(b, dupes[1]);
|
|
|
|
|
StoreAlignedSIMD(c, dupes[2]);
|
|
|
|
|
StoreAlignedSIMD(d, dupes[3]);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Store the FourQuaternions out to four consecutive ordinary quaternions in memory.
|
|
|
|
|
FORCEINLINE void SwizzleAndStoreAligned(QuaternionAligned* qs)
|
|
|
|
|
{
|
|
|
|
|
#if defined( _X360 )
|
|
|
|
|
fltx4 r0 = __vmrghw(x, z);
|
|
|
|
|
fltx4 r1 = __vmrghw(y, w);
|
|
|
|
|
fltx4 r2 = __vmrglw(x, z);
|
|
|
|
|
fltx4 r3 = __vmrglw(y, w);
|
|
|
|
|
|
|
|
|
|
fltx4 rx = __vmrghw(r0, r1);
|
|
|
|
|
fltx4 ry = __vmrglw(r0, r1);
|
|
|
|
|
fltx4 rz = __vmrghw(r2, r3);
|
|
|
|
|
fltx4 rw = __vmrglw(r2, r3);
|
|
|
|
|
|
|
|
|
|
StoreAlignedSIMD(qs, rx);
|
|
|
|
|
StoreAlignedSIMD(++qs, ry);
|
|
|
|
|
StoreAlignedSIMD(++qs, rz);
|
|
|
|
|
StoreAlignedSIMD(++qs, rw);
|
|
|
|
|
#else
|
|
|
|
|
SwizzleAndStoreAligned(qs, qs + 1, qs + 2, qs + 3);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Store the FourQuaternions out to four consecutive ordinary quaternions in memory.
|
|
|
|
|
// The mask specifies which of the quaternions are actually written out -- each
|
|
|
|
|
// word in the fltx4 should be all binary ones or zeros. Ones means the corresponding
|
|
|
|
|
// quat will be written.
|
|
|
|
|
FORCEINLINE void SwizzleAndStoreAlignedMasked(QuaternionAligned* RESTRICT qs, const bi32x4& controlMask)
|
|
|
|
|
{
|
|
|
|
|
fltx4 originals[4];
|
|
|
|
|
originals[0] = LoadAlignedSIMD(qs);
|
|
|
|
|
originals[1] = LoadAlignedSIMD(qs + 1);
|
|
|
|
|
originals[2] = LoadAlignedSIMD(qs + 2);
|
|
|
|
|
originals[3] = LoadAlignedSIMD(qs + 3);
|
|
|
|
|
|
|
|
|
|
bi32x4 masks[4] = { SplatXSIMD(controlMask),
|
|
|
|
|
SplatYSIMD(controlMask),
|
|
|
|
|
SplatZSIMD(controlMask),
|
|
|
|
|
SplatWSIMD(controlMask) };
|
|
|
|
|
|
|
|
|
|
#if defined( _X360 )
|
|
|
|
|
fltx4 r0 = __vmrghw(x, z);
|
|
|
|
|
fltx4 r1 = __vmrghw(y, w);
|
|
|
|
|
fltx4 r2 = __vmrglw(x, z);
|
|
|
|
|
fltx4 r3 = __vmrglw(y, w);
|
|
|
|
|
|
|
|
|
|
fltx4 rx = __vmrghw(r0, r1);
|
|
|
|
|
fltx4 ry = __vmrglw(r0, r1);
|
|
|
|
|
fltx4 rz = __vmrghw(r2, r3);
|
|
|
|
|
fltx4 rw = __vmrglw(r2, r3);
|
|
|
|
|
#else
|
|
|
|
|
fltx4 rx = x;
|
|
|
|
|
fltx4 ry = y;
|
|
|
|
|
fltx4 rz = z;
|
|
|
|
|
fltx4 rw = w;
|
|
|
|
|
TransposeSIMD(rx, ry, rz, rw);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
StoreAlignedSIMD(qs + 0, MaskedAssign(masks[0], rx, originals[0]));
|
|
|
|
|
StoreAlignedSIMD(qs + 1, MaskedAssign(masks[1], ry, originals[1]));
|
|
|
|
|
StoreAlignedSIMD(qs + 2, MaskedAssign(masks[2], rz, originals[2]));
|
|
|
|
|
StoreAlignedSIMD(qs + 3, MaskedAssign(masks[3], rw, originals[3]));
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FORCEINLINE FourQuaternions FourQuaternions::Conjugate() const
|
|
|
|
|
{
|
|
|
|
|
return FourQuaternions(NegSIMD(x), NegSIMD(y), NegSIMD(z), w);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FORCEINLINE const fltx4 Dot(const FourQuaternions& a, const FourQuaternions& b)
|
|
|
|
|
{
|
|
|
|
|
return
|
|
|
|
|
MaddSIMD(a.x, b.x,
|
|
|
|
|
MaddSIMD(a.y, b.y,
|
|
|
|
|
MaddSIMD(a.z, b.z, MulSIMD(a.w, b.w))
|
|
|
|
|
)
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FORCEINLINE const FourQuaternions Madd(const FourQuaternions& a, const fltx4& scale, const FourQuaternions& c)
|
|
|
|
|
{
|
|
|
|
|
FourQuaternions ret;
|
|
|
|
|
ret.x = MaddSIMD(a.x, scale, c.x);
|
|
|
|
|
ret.y = MaddSIMD(a.y, scale, c.y);
|
|
|
|
|
ret.z = MaddSIMD(a.z, scale, c.z);
|
|
|
|
|
ret.w = MaddSIMD(a.w, scale, c.w);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE const FourQuaternions Mul(const FourQuaternions& a, const fltx4& scale)
|
|
|
|
|
{
|
|
|
|
|
FourQuaternions ret;
|
|
|
|
|
ret.x = MulSIMD(a.x, scale);
|
|
|
|
|
ret.y = MulSIMD(a.y, scale);
|
|
|
|
|
ret.z = MulSIMD(a.z, scale);
|
|
|
|
|
ret.w = MulSIMD(a.w, scale);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE const FourQuaternions Add(const FourQuaternions& a, const FourQuaternions& b)
|
|
|
|
|
{
|
|
|
|
|
FourQuaternions ret;
|
|
|
|
|
ret.x = AddSIMD(a.x, b.x);
|
|
|
|
|
ret.y = AddSIMD(a.y, b.y);
|
|
|
|
|
ret.z = AddSIMD(a.z, b.z);
|
|
|
|
|
ret.w = AddSIMD(a.w, b.w);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE const FourQuaternions Sub(const FourQuaternions& a, const FourQuaternions& b)
|
|
|
|
|
{
|
|
|
|
|
FourQuaternions ret;
|
|
|
|
|
ret.x = SubSIMD(a.x, b.x);
|
|
|
|
|
ret.y = SubSIMD(a.y, b.y);
|
|
|
|
|
ret.z = SubSIMD(a.z, b.z);
|
|
|
|
|
ret.w = SubSIMD(a.w, b.w);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE const FourQuaternions Neg(const FourQuaternions& q)
|
|
|
|
|
{
|
|
|
|
|
FourQuaternions ret;
|
|
|
|
|
ret.x = NegSIMD(q.x);
|
|
|
|
|
ret.y = NegSIMD(q.y);
|
|
|
|
|
ret.z = NegSIMD(q.z);
|
|
|
|
|
ret.w = NegSIMD(q.w);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE const FourQuaternions MaskedAssign(const bi32x4& mask, const FourQuaternions& a, const FourQuaternions& b)
|
|
|
|
|
{
|
|
|
|
|
FourQuaternions ret;
|
|
|
|
|
ret.x = MaskedAssign(mask, a.x, b.x);
|
|
|
|
|
ret.y = MaskedAssign(mask, a.y, b.y);
|
|
|
|
|
ret.z = MaskedAssign(mask, a.z, b.z);
|
|
|
|
|
ret.w = MaskedAssign(mask, a.w, b.w);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifdef DIFFERENT_NATIVE_VECTOR_TYPES
|
|
|
|
|
FORCEINLINE const FourQuaternions MaskedAssign(const fltx4& mask, const FourQuaternions& a, const FourQuaternions& b)
|
|
|
|
|
{
|
|
|
|
|
return MaskedAssign((bi32x4)mask, a, b);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FORCEINLINE FourQuaternions QuaternionAlign(const FourQuaternions& p, const FourQuaternions& q)
|
|
|
|
|
{
|
|
|
|
|
// decide if one of the quaternions is backwards
|
|
|
|
|
bi32x4 cmp = CmpLtSIMD(Dot(p, q), Four_Zeros);
|
|
|
|
|
return MaskedAssign(cmp, Neg(q), q);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FORCEINLINE const FourQuaternions QuaternionNormalize(const FourQuaternions& q)
|
|
|
|
|
{
|
|
|
|
|
fltx4 radius = Dot(q, q);
|
|
|
|
|
bi32x4 mask = CmpEqSIMD(radius, Four_Zeros); // all ones iff radius = 0
|
|
|
|
|
fltx4 invRadius = ReciprocalSqrtSIMD(radius);
|
|
|
|
|
|
|
|
|
|
FourQuaternions ret = MaskedAssign(mask, q, Mul(q, invRadius));
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if !defined(__SPU__)
|
|
|
|
|
FORCEINLINE FourQuaternions& FourQuaternions::FromAxisAndAngles(const fltx4& axis,
|
|
|
|
|
const float& angle0, const float& angle1, const float& angle2, const float& angle3)
|
|
|
|
|
{
|
|
|
|
|
return FromAxisAndAngles(axis, LoadGatherSIMD(angle0, angle1, angle2, angle3));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
FORCEINLINE FourQuaternions& FourQuaternions::FromAxisAndAngles(const fltx4& axis,
|
|
|
|
|
const fltx4& angles)
|
|
|
|
|
{
|
|
|
|
|
// compute the half theta
|
|
|
|
|
fltx4 theta = MulSIMD(angles, Four_PointFives);
|
|
|
|
|
// compute the sine and cosine of each angle simultaneously
|
|
|
|
|
fltx4 vsines; fltx4 vcoses;
|
|
|
|
|
SinCosSIMD(vsines, vcoses, theta);
|
|
|
|
|
// now the sines and coses vectors contain the results for four angles.
|
|
|
|
|
// for each of the angles, splat them out and then swizzle together so
|
|
|
|
|
// as to get a < cos, sin, sin, sin > coefficient vector
|
|
|
|
|
|
|
|
|
|
x = MulSIMD(vsines, SplatXSIMD(axis)); // sin(t0) * x, sin(t1) * x, etc
|
|
|
|
|
y = MulSIMD(vsines, SplatYSIMD(axis));
|
|
|
|
|
z = MulSIMD(vsines, SplatZSIMD(axis));
|
|
|
|
|
w = vcoses;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// this = this * q;
|
|
|
|
|
FORCEINLINE FourQuaternions FourQuaternions::Mul(FourQuaternions const& q) const
|
|
|
|
|
{
|
|
|
|
|
// W = w1w2 - x1x2 - y1y2 - z1z2
|
|
|
|
|
FourQuaternions ret;
|
|
|
|
|
fltx4 signMask = LoadAlignedSIMD((float*)g_SIMD_signmask);
|
|
|
|
|
// as we do the multiplication, also do a dot product, so we know whether
|
|
|
|
|
// one of the quats is backwards and if we therefore have to negate at the end
|
|
|
|
|
fltx4 dotProduct = MulSIMD(w, q.w);
|
|
|
|
|
|
|
|
|
|
ret.w = MulSIMD(w, q.w); // W = w1w2
|
|
|
|
|
ret.x = MulSIMD(w, q.x); // X = w1x2
|
|
|
|
|
ret.y = MulSIMD(w, q.y); // Y = w1y2
|
|
|
|
|
ret.z = MulSIMD(w, q.z); // Z = w1z2
|
|
|
|
|
|
|
|
|
|
dotProduct = MaddSIMD(x, q.x, dotProduct);
|
|
|
|
|
ret.w = MsubSIMD(x, q.x, ret.w); // W = w1w2 - x1x2
|
|
|
|
|
ret.x = MaddSIMD(x, q.w, ret.x); // X = w1x2 + x1w2
|
|
|
|
|
ret.y = MsubSIMD(x, q.z, ret.y); // Y = w1y2 - x1z2
|
|
|
|
|
ret.z = MaddSIMD(x, q.y, ret.z); // Z = w1z2 + x1y2
|
|
|
|
|
|
|
|
|
|
dotProduct = MaddSIMD(y, q.y, dotProduct);
|
|
|
|
|
ret.w = MsubSIMD(y, q.y, ret.w); // W = w1w2 - x1x2 - y1y2
|
|
|
|
|
ret.x = MaddSIMD(y, q.z, ret.x); // X = w1x2 + x1w2 + y1z2
|
|
|
|
|
ret.y = MaddSIMD(y, q.w, ret.y); // Y = w1y2 - x1z2 + y1w2
|
|
|
|
|
ret.z = MsubSIMD(y, q.x, ret.z); // Z = w1z2 + x1y2 - y1x2
|
|
|
|
|
|
|
|
|
|
dotProduct = MaddSIMD(z, q.z, dotProduct);
|
|
|
|
|
ret.w = MsubSIMD(z, q.z, ret.w); // W = w1w2 - x1x2 - y1y2 - z1z2
|
|
|
|
|
ret.x = MsubSIMD(z, q.y, ret.x); // X = w1x2 + x1w2 + y1z2 - z1y2
|
|
|
|
|
ret.y = MaddSIMD(z, q.x, ret.y); // Y = w1y2 - x1z2 + y1w2 + z1x2
|
|
|
|
|
ret.z = MaddSIMD(z, q.w, ret.z); // Z = w1z2 + x1y2 - y1x2 + z1w2
|
|
|
|
|
|
|
|
|
|
fltx4 Zero = Four_Zeros;
|
|
|
|
|
bi32x4 control = CmpLtSIMD(dotProduct, Four_Zeros);
|
|
|
|
|
signMask = MaskedAssign(control, signMask, Zero); // negate quats where q1.q2 < 0
|
|
|
|
|
ret.w = XorSIMD(signMask, ret.w);
|
|
|
|
|
ret.x = XorSIMD(signMask, ret.x);
|
|
|
|
|
ret.y = XorSIMD(signMask, ret.y);
|
|
|
|
|
ret.z = XorSIMD(signMask, ret.z);
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2023-04-01 22:11:41 +02:00
|
|
|
|
FORCEINLINE void FourQuaternions::RotateFourVectors(FourVectors* /*RESTRICT*/ vecs) const /*RESTRICT*/
|
2022-07-08 00:55:01 +02:00
|
|
|
|
{
|
|
|
|
|
fltx4 tmpX, tmpY, tmpZ, tmpW;
|
|
|
|
|
fltx4 outX, outY, outZ;
|
|
|
|
|
|
|
|
|
|
tmpX = SubSIMD(MaddSIMD(w, vecs->x, MulSIMD(y, vecs->z)),
|
|
|
|
|
MulSIMD(z, vecs->y));
|
|
|
|
|
|
|
|
|
|
tmpY = SubSIMD(MaddSIMD(w, vecs->y, MulSIMD(z, vecs->x)),
|
|
|
|
|
MulSIMD(x, vecs->z));
|
|
|
|
|
|
|
|
|
|
tmpZ = SubSIMD(MaddSIMD(w, vecs->z, MulSIMD(x, vecs->y)),
|
|
|
|
|
MulSIMD(y, vecs->x));
|
|
|
|
|
|
|
|
|
|
tmpW = AddSIMD(MaddSIMD(x, vecs->x, MulSIMD(y, vecs->y)),
|
|
|
|
|
MulSIMD(z, vecs->z));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outX = AddSIMD(SubSIMD(MaddSIMD(tmpW, x, MulSIMD(tmpX, w)),
|
|
|
|
|
MulSIMD(tmpY, z)),
|
|
|
|
|
MulSIMD(tmpZ, y));
|
|
|
|
|
|
|
|
|
|
outY = AddSIMD(SubSIMD(MaddSIMD(tmpW, y, MulSIMD(tmpY, w)),
|
|
|
|
|
MulSIMD(tmpZ, x)),
|
|
|
|
|
MulSIMD(tmpX, z));
|
|
|
|
|
|
|
|
|
|
outZ = AddSIMD(SubSIMD(MaddSIMD(tmpW, z, MulSIMD(tmpZ, w)),
|
|
|
|
|
MulSIMD(tmpX, y)),
|
|
|
|
|
MulSIMD(tmpY, x));
|
|
|
|
|
|
|
|
|
|
// although apparently redundant, assigning the results to intermediate local variables
|
|
|
|
|
// seems to improve code scheduling slightly in SN.
|
|
|
|
|
vecs->x = outX;
|
|
|
|
|
vecs->y = outY;
|
|
|
|
|
vecs->z = outZ;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
|
|
void QuaternionScale( const Quaternion &p, float t, Quaternion &q )
|
|
|
|
|
{
|
|
|
|
|
Assert( s_bMathlibInitialized );
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
float r;
|
|
|
|
|
|
|
|
|
|
// FIXME: nick, this isn't overly sensitive to accuracy, and it may be faster to
|
|
|
|
|
// use the cos part (w) of the quaternion (sin(omega)*N,cos(omega)) to figure the new scale.
|
|
|
|
|
float sinom = sqrt( DotProduct( &p.x, &p.x ) );
|
|
|
|
|
sinom = min( sinom, 1.f );
|
|
|
|
|
|
|
|
|
|
float sinsom = sin( asin( sinom ) * t );
|
|
|
|
|
|
|
|
|
|
t = sinsom / (sinom + FLT_EPSILON);
|
|
|
|
|
VectorScale( &p.x, t, &q.x );
|
|
|
|
|
|
|
|
|
|
// rescale rotation
|
|
|
|
|
r = 1.0f - sinsom * sinsom;
|
|
|
|
|
|
|
|
|
|
// Assert( r >= 0 );
|
|
|
|
|
if (r < 0.0f)
|
|
|
|
|
r = 0.0f;
|
|
|
|
|
r = sqrt( r );
|
|
|
|
|
|
|
|
|
|
// keep sign of rotation
|
|
|
|
|
if (p.w < 0)
|
|
|
|
|
q.w = -r;
|
|
|
|
|
else
|
|
|
|
|
q.w = r;
|
|
|
|
|
|
|
|
|
|
Assert( q.IsValid() );
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
FORCEINLINE FourQuaternions FourQuaternions::ScaleAngle(const fltx4& scale) const
|
|
|
|
|
{
|
|
|
|
|
FourQuaternions ret;
|
|
|
|
|
static const fltx4 OneMinusEpsilon = { 1.0f - 0.000001f, 1.0f - 0.000001f, 1.0f - 0.000001f, 1.0f - 0.000001f };
|
|
|
|
|
const fltx4 Zero = Four_Zeros;
|
|
|
|
|
fltx4 signMask = LoadAlignedSIMD((float*)g_SIMD_signmask);
|
|
|
|
|
// work out if there are any tiny scales or angles, which are unstable
|
|
|
|
|
bi32x4 tinyAngles = CmpGtSIMD(w, OneMinusEpsilon);
|
|
|
|
|
bi32x4 negativeRotations = CmpLtSIMD(w, Zero); // if any w's are <0, we will need to negate later down
|
|
|
|
|
|
|
|
|
|
// figure out the theta
|
|
|
|
|
fltx4 angles = ArcCosSIMD(w);
|
|
|
|
|
|
|
|
|
|
// test also if w > -1
|
|
|
|
|
fltx4 negativeWs = XorSIMD(signMask, w);
|
|
|
|
|
tinyAngles = OrSIMD(CmpGtSIMD(negativeWs, OneMinusEpsilon), tinyAngles);
|
|
|
|
|
|
|
|
|
|
// meanwhile start working on computing the dot product of the
|
|
|
|
|
// vector component, and trust in the scheduler to interleave them
|
|
|
|
|
fltx4 vLenSq = MulSIMD(x, x);
|
|
|
|
|
vLenSq = MaddSIMD(y, y, vLenSq);
|
|
|
|
|
vLenSq = MaddSIMD(z, z, vLenSq);
|
|
|
|
|
|
|
|
|
|
// scale the angles
|
|
|
|
|
angles = MulSIMD(angles, scale);
|
|
|
|
|
|
|
|
|
|
// clear out the sign mask where w>=0
|
|
|
|
|
signMask = MaskedAssign(negativeRotations, signMask, Zero);
|
|
|
|
|
|
|
|
|
|
// work out the new w component and vector length
|
|
|
|
|
fltx4 vLenRecip = ReciprocalSqrtSIMD(vLenSq); // interleave with Cos to hide latencies
|
|
|
|
|
fltx4 sine;
|
|
|
|
|
SinCosSIMD(sine, ret.w, angles);
|
|
|
|
|
ret.x = MulSIMD(x, vLenRecip); // renormalize so the vector length + w = 1
|
|
|
|
|
ret.y = MulSIMD(y, vLenRecip); // renormalize so the vector length + w = 1
|
|
|
|
|
ret.z = MulSIMD(z, vLenRecip); // renormalize so the vector length + w = 1
|
|
|
|
|
ret.x = MulSIMD(ret.x, sine);
|
|
|
|
|
ret.y = MulSIMD(ret.y, sine);
|
|
|
|
|
ret.z = MulSIMD(ret.z, sine);
|
|
|
|
|
|
|
|
|
|
// negate where necessary
|
|
|
|
|
ret.x = XorSIMD(ret.x, signMask);
|
|
|
|
|
ret.y = XorSIMD(ret.y, signMask);
|
|
|
|
|
ret.z = XorSIMD(ret.z, signMask);
|
|
|
|
|
ret.w = XorSIMD(ret.w, signMask);
|
|
|
|
|
|
|
|
|
|
// finally, toss results from where cos(theta) is close to 1 -- these are non rotations.
|
|
|
|
|
ret.x = MaskedAssign(tinyAngles, x, ret.x);
|
|
|
|
|
ret.y = MaskedAssign(tinyAngles, y, ret.y);
|
|
|
|
|
ret.z = MaskedAssign(tinyAngles, z, ret.z);
|
|
|
|
|
ret.w = MaskedAssign(tinyAngles, w, ret.w);
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
|
// Purpose: return = this * ( s * q )
|
|
|
|
|
// In other words, for a quaternion representing a rotation of angle theta, return
|
|
|
|
|
// one of angle s*theta
|
|
|
|
|
// s is four floats in a fltx4 -- one for each quaternion
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
FORCEINLINE FourQuaternions FourQuaternions::MulAc(const fltx4& s, const FourQuaternions& q) const
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
void QuaternionMA( const Quaternion &p, float s, const Quaternion &q, Quaternion &qt )
|
|
|
|
|
{
|
|
|
|
|
Quaternion p1, q1;
|
|
|
|
|
|
|
|
|
|
QuaternionScale( q, s, q1 );
|
|
|
|
|
QuaternionMult( p, q1, p1 );
|
|
|
|
|
QuaternionNormalize( p1 );
|
|
|
|
|
qt[0] = p1[0];
|
|
|
|
|
qt[1] = p1[1];
|
|
|
|
|
qt[2] = p1[2];
|
|
|
|
|
qt[3] = p1[3];
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
return Mul(q.ScaleAngle(s));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FORCEINLINE FourQuaternions FourQuaternions::ScaleMul(const fltx4& s, const FourQuaternions& q) const
|
|
|
|
|
{
|
|
|
|
|
return ScaleAngle(s).Mul(q);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FORCEINLINE FourQuaternions FourQuaternions::Slerp(const FourQuaternions& originalto, const fltx4& t)
|
|
|
|
|
{
|
|
|
|
|
FourQuaternions ret;
|
|
|
|
|
static const fltx4 OneMinusEpsilon = { 1.0f - 0.000001f, 1.0f - 0.000001f, 1.0f - 0.000001f, 1.0f - 0.000001f };
|
|
|
|
|
|
|
|
|
|
// align if necessary.
|
|
|
|
|
|
|
|
|
|
// actually, before we even do that, start by computing the dot product of
|
|
|
|
|
// the quaternions. it has lots of dependent ops and we can sneak it into
|
|
|
|
|
// the pipeline bubbles as we figure out alignment. Of course we don't know
|
|
|
|
|
// yet if we need to realign, so compute them both -- there's plenty of
|
|
|
|
|
// space in the bubbles. They're roomy, those bubbles.
|
|
|
|
|
fltx4 cosineOmega;
|
2022-09-09 19:47:31 +02:00
|
|
|
|
#if 0 // Maybe I don't need to do alignment separately, using the xb360 technique...
|
2022-07-08 00:55:01 +02:00
|
|
|
|
FourQuaternions to;
|
|
|
|
|
{
|
|
|
|
|
fltx4 diffs[4], sums[4], originalToNeg[4];
|
|
|
|
|
fltx4 dotIfAligned, dotIfNotAligned;
|
|
|
|
|
|
|
|
|
|
// compute negations of the TO quaternion.
|
|
|
|
|
originalToNeg[0] = NegSIMD(originalto.x);
|
|
|
|
|
originalToNeg[1] = NegSIMD(originalto.y);
|
|
|
|
|
originalToNeg[2] = NegSIMD(originalto.z);
|
|
|
|
|
originalToNeg[3] = NegSIMD(originalto.w);
|
|
|
|
|
|
|
|
|
|
dotIfAligned = MulSIMD(x, originalto.x);
|
|
|
|
|
dotIfNotAligned = MulSIMD(x, originalToNeg[0]);
|
|
|
|
|
|
|
|
|
|
diffs[0] = SubSIMD(x, originalto.x);
|
|
|
|
|
diffs[1] = SubSIMD(y, originalto.y);
|
|
|
|
|
diffs[2] = SubSIMD(z, originalto.z);
|
|
|
|
|
diffs[3] = SubSIMD(w, originalto.w);
|
|
|
|
|
|
|
|
|
|
sums[0] = AddSIMD(x, originalto.x);
|
|
|
|
|
sums[1] = AddSIMD(y, originalto.y);
|
|
|
|
|
sums[2] = AddSIMD(z, originalto.z);
|
|
|
|
|
sums[3] = AddSIMD(w, originalto.w);
|
|
|
|
|
|
|
|
|
|
dotIfAligned = MaddSIMD(y, originalto.y, dotIfAligned);
|
|
|
|
|
dotIfNotAligned = MaddSIMD(y, originalToNeg[1], dotIfNotAligned);
|
|
|
|
|
|
|
|
|
|
fltx4 diffsDot, sumsDot;
|
|
|
|
|
|
|
|
|
|
diffsDot = MulSIMD(diffs[0], diffs[0]); // x^2
|
|
|
|
|
sumsDot = MulSIMD(sums[0], sums[0]); // x^2
|
|
|
|
|
// do some work on the dot products while letting the multiplies cook
|
|
|
|
|
dotIfAligned = MaddSIMD(z, originalto.z, dotIfAligned);
|
|
|
|
|
dotIfNotAligned = MaddSIMD(z, originalToNeg[2], dotIfNotAligned);
|
|
|
|
|
|
|
|
|
|
diffsDot = MaddSIMD(diffs[1], diffs[1], diffsDot); // x^2 + y^2
|
|
|
|
|
sumsDot = MaddSIMD(sums[1], sums[1], sumsDot);
|
|
|
|
|
diffsDot = MaddSIMD(diffs[2], diffs[2], diffsDot); // x^2 + y^2 + z^2
|
|
|
|
|
sumsDot = MaddSIMD(sums[2], sums[2], sumsDot);
|
|
|
|
|
diffsDot = MaddSIMD(diffs[3], diffs[3], diffsDot); // x^2 + y^2 + z^2 + w^2
|
|
|
|
|
sumsDot = MaddSIMD(sums[3], sums[3], sumsDot);
|
|
|
|
|
// do some work on the dot products while letting the multiplies cook
|
|
|
|
|
dotIfAligned = MaddSIMD(w, originalto.w, dotIfAligned);
|
|
|
|
|
dotIfNotAligned = MaddSIMD(w, originalToNeg[3], dotIfNotAligned);
|
|
|
|
|
|
|
|
|
|
// are the differences greater than the sums?
|
|
|
|
|
// if so, we need to negate that quaternion
|
|
|
|
|
fltx4 mask = CmpGtSIMD(diffsDot, sumsDot); // 1 for diffs>0 and 0 elsewhere
|
|
|
|
|
to.x = MaskedAssign(mask, originalToNeg[0], originalto.x);
|
|
|
|
|
to.y = MaskedAssign(mask, originalToNeg[1], originalto.y);
|
|
|
|
|
to.z = MaskedAssign(mask, originalToNeg[2], originalto.z);
|
|
|
|
|
to.w = MaskedAssign(mask, originalToNeg[3], originalto.w);
|
|
|
|
|
|
|
|
|
|
cosineOmega = MaskedAssign(mask, dotIfNotAligned, dotIfAligned);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// right, now to is aligned to be the short way round, and we computed
|
|
|
|
|
// the dot product while we were figuring all that out.
|
|
|
|
|
#else
|
|
|
|
|
const FourQuaternions& to = originalto;
|
|
|
|
|
cosineOmega = MulSIMD(x, to.x);
|
|
|
|
|
cosineOmega = MaddSIMD(y, to.y, cosineOmega);
|
|
|
|
|
cosineOmega = MaddSIMD(z, to.z, cosineOmega);
|
|
|
|
|
cosineOmega = MaddSIMD(w, to.w, cosineOmega);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
fltx4 Zero = Four_Zeros;
|
|
|
|
|
bi32x4 cosOmegaLessThanZero = CmpLtSIMD(cosineOmega, Zero);
|
|
|
|
|
// fltx4 shouldNegate = MaskedAssign(cosOmegaLessThanZero, Four_NegativeOnes , Four_Ones );
|
|
|
|
|
fltx4 signMask = LoadAlignedSIMD((float*)g_SIMD_signmask); // contains a one in the sign bit -- xor against a number to negate it
|
|
|
|
|
fltx4 sinOmega = Four_Ones;
|
|
|
|
|
|
|
|
|
|
// negate cosineOmega where necessary
|
|
|
|
|
cosineOmega = MaskedAssign(cosOmegaLessThanZero, XorSIMD(cosineOmega, signMask), cosineOmega);
|
|
|
|
|
fltx4 oneMinusT = SubSIMD(Four_Ones, t);
|
|
|
|
|
bi32x4 bCosOmegaLessThanOne = CmpLtSIMD(cosineOmega, OneMinusEpsilon); // we'll use this to mask out null slerps
|
|
|
|
|
|
|
|
|
|
// figure out the sin component of the diff quaternion.
|
|
|
|
|
// since sin^2(t) + cos^2(t) = 1...
|
|
|
|
|
sinOmega = MsubSIMD(cosineOmega, cosineOmega, sinOmega); // = 1 - cos^2(t) = sin^2(t)
|
|
|
|
|
fltx4 invSinOmega = ReciprocalSqrtSIMD(sinOmega); // 1/sin(t)
|
|
|
|
|
sinOmega = MulSIMD(sinOmega, invSinOmega); // = sin^2(t) / sin(t) = sin(t)
|
|
|
|
|
|
|
|
|
|
// use the arctangent technique to work out omega from tan^-1(sin/cos)
|
|
|
|
|
fltx4 omega = ArcTan2SIMD(sinOmega, cosineOmega);
|
|
|
|
|
|
|
|
|
|
// alpha = sin(omega * (1-T))/sin(omega)
|
|
|
|
|
// beta = sin(omega * T)/sin(omega)
|
|
|
|
|
fltx4 alpha = MulSIMD(omega, oneMinusT); // w(1-T)
|
|
|
|
|
fltx4 beta = MulSIMD(omega, t); // w(T)
|
|
|
|
|
signMask = MaskedAssign(cosOmegaLessThanZero, signMask, Zero);
|
|
|
|
|
|
|
|
|
|
alpha = SinSIMD(alpha); // sin(w(1-T))
|
|
|
|
|
beta = SinSIMD(beta); // sin(wT)
|
|
|
|
|
|
|
|
|
|
alpha = MulSIMD(alpha, invSinOmega);
|
|
|
|
|
beta = MulSIMD(beta, invSinOmega);
|
|
|
|
|
|
|
|
|
|
// depending on whether the dot product was less than zero, negate beta, or not
|
|
|
|
|
beta = XorSIMD(beta, signMask);
|
|
|
|
|
|
|
|
|
|
// mask out singularities (where omega = 1)
|
|
|
|
|
alpha = MaskedAssign(bCosOmegaLessThanOne, alpha, oneMinusT);
|
|
|
|
|
beta = MaskedAssign(bCosOmegaLessThanOne, beta, t);
|
|
|
|
|
|
|
|
|
|
ret.x = MulSIMD(x, alpha);
|
|
|
|
|
ret.y = MulSIMD(y, alpha);
|
|
|
|
|
ret.z = MulSIMD(z, alpha);
|
|
|
|
|
ret.w = MulSIMD(w, alpha);
|
|
|
|
|
|
|
|
|
|
ret.x = MaddSIMD(to.x, beta, ret.x);
|
|
|
|
|
ret.y = MaddSIMD(to.y, beta, ret.y);
|
|
|
|
|
ret.z = MaddSIMD(to.z, beta, ret.z);
|
|
|
|
|
ret.w = MaddSIMD(to.w, beta, ret.w);
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FORCEINLINE FourQuaternions FourQuaternions::SlerpNoAlign(const FourQuaternions& originalto, const fltx4& t)
|
|
|
|
|
{
|
|
|
|
|
FourQuaternions ret;
|
|
|
|
|
static const fltx4 OneMinusEpsilon = { 1.0f - 0.000001f, 1.0f - 0.000001f, 1.0f - 0.000001f, 1.0f - 0.000001f };
|
|
|
|
|
|
|
|
|
|
// align if necessary.
|
|
|
|
|
|
|
|
|
|
// actually, before we even do that, start by computing the dot product of
|
|
|
|
|
// the quaternions. it has lots of dependent ops and we can sneak it into
|
|
|
|
|
// the pipeline bubbles as we figure out alignment. Of course we don't know
|
|
|
|
|
// yet if we need to realign, so compute them both -- there's plenty of
|
|
|
|
|
// space in the bubbles. They're roomy, those bubbles.
|
|
|
|
|
fltx4 cosineOmega;
|
|
|
|
|
|
|
|
|
|
const FourQuaternions& to = originalto;
|
|
|
|
|
cosineOmega = MulSIMD(x, to.x);
|
|
|
|
|
cosineOmega = MaddSIMD(y, to.y, cosineOmega);
|
|
|
|
|
cosineOmega = MaddSIMD(z, to.z, cosineOmega);
|
|
|
|
|
cosineOmega = MaddSIMD(w, to.w, cosineOmega);
|
|
|
|
|
|
|
|
|
|
fltx4 sinOmega = Four_Ones;
|
|
|
|
|
|
|
|
|
|
fltx4 oneMinusT = SubSIMD(Four_Ones, t);
|
|
|
|
|
bi32x4 bCosOmegaLessThanOne = CmpLtSIMD(cosineOmega, OneMinusEpsilon); // we'll use this to mask out null slerps
|
|
|
|
|
|
|
|
|
|
// figure out the sin component of the diff quaternion.
|
|
|
|
|
// since sin^2(t) + cos^2(t) = 1...
|
|
|
|
|
sinOmega = MsubSIMD(cosineOmega, cosineOmega, sinOmega); // = 1 - cos^2(t) = sin^2(t)
|
|
|
|
|
fltx4 invSinOmega = ReciprocalSqrtSIMD(sinOmega); // 1/sin(t)
|
|
|
|
|
sinOmega = MulSIMD(sinOmega, invSinOmega); // = sin^2(t) / sin(t) = sin(t)
|
|
|
|
|
|
|
|
|
|
// use the arctangent technique to work out omega from tan^-1(sin/cos)
|
|
|
|
|
fltx4 omega = ArcTan2SIMD(sinOmega, cosineOmega);
|
|
|
|
|
|
|
|
|
|
// alpha = sin(omega * (1-T))/sin(omega)
|
|
|
|
|
// beta = sin(omega * T)/sin(omega)
|
|
|
|
|
fltx4 alpha = MulSIMD(omega, oneMinusT); // w(1-T)
|
|
|
|
|
fltx4 beta = MulSIMD(omega, t); // w(T)
|
|
|
|
|
alpha = SinSIMD(alpha); // sin(w(1-T))
|
|
|
|
|
beta = SinSIMD(beta); // sin(wT)
|
|
|
|
|
alpha = MulSIMD(alpha, invSinOmega);
|
|
|
|
|
beta = MulSIMD(beta, invSinOmega);
|
|
|
|
|
|
|
|
|
|
// mask out singularities (where omega = 1)
|
|
|
|
|
alpha = MaskedAssign(bCosOmegaLessThanOne, alpha, oneMinusT);
|
|
|
|
|
beta = MaskedAssign(bCosOmegaLessThanOne, beta, t);
|
|
|
|
|
|
|
|
|
|
ret.x = MulSIMD(x, alpha);
|
|
|
|
|
ret.y = MulSIMD(y, alpha);
|
|
|
|
|
ret.z = MulSIMD(z, alpha);
|
|
|
|
|
ret.w = MulSIMD(w, alpha);
|
|
|
|
|
|
|
|
|
|
ret.x = MaddSIMD(to.x, beta, ret.x);
|
|
|
|
|
ret.y = MaddSIMD(to.y, beta, ret.y);
|
|
|
|
|
ret.z = MaddSIMD(to.z, beta, ret.z);
|
|
|
|
|
ret.w = MaddSIMD(to.w, beta, ret.w);
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/***** removed because one of the SWIG permutations doesn't include ssequaternion.h, causing a missing symbol on this function:
|
|
|
|
|
inline void FourVectors::RotateBy( const FourQuaternions &quats )
|
|
|
|
|
{
|
|
|
|
|
quats.RotateFourVectors( this );
|
|
|
|
|
}
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
2022-07-06 21:11:32 +02:00
|
|
|
|
#endif // SSEQUATMATH_H
|
|
|
|
|
|
2022-07-08 00:55:01 +02:00
|
|
|
|
|