Utilize SIMD for 'DrawNavMeshPortals'

* Use SIMD to construct vertices into single XMM register.
* Use SIMD to construct origin and destination vectors from vertices.
* Deref 'navmesh_debug_camera_range' only once outside the render loop.
* Light code cleanup.
This commit is contained in:
Kawe Mazidjatari 2022-09-29 22:26:27 +02:00
parent f06c9a01e2
commit e111587dd7

View File

@ -136,10 +136,10 @@ void DrawNavMeshPortals()
if (!mesh) if (!mesh)
return; return;
const bool bUseDepthBuffer = r_debug_overlay_zbuffer->GetBool(); const bool zbuffer = r_debug_overlay_zbuffer->GetBool();
OverlayBox_t::Transforms vTransforms; const float range = navmesh_debug_camera_range->GetFloat();
for (int i = navmesh_draw_portal->GetInt(); i < mesh->getTileCount(); ++i) for (int i = navmesh_draw_portal->GetInt(), j = mesh->getTileCount(); i < j; ++i)
{ {
if (navmesh_debug_tile_range->GetBool()) if (navmesh_debug_tile_range->GetBool())
{ {
@ -151,12 +151,12 @@ void DrawNavMeshPortals()
if (!tile->header) if (!tile->header)
continue; continue;
if (navmesh_debug_camera_range->GetBool()) if (range > 0)
{ {
const Vector3D vCamera = MainViewOrigin(); const Vector3D camera = MainViewOrigin();
if (vCamera.DistTo(Vector3D(tile->header->bmin[0], tile->header->bmin[1], vCamera.z)) > navmesh_debug_camera_range->GetFloat() || if (camera.DistTo(Vector3D(tile->header->bmin[0], tile->header->bmin[1], camera.z)) > range ||
vCamera.DistTo(Vector3D(tile->header->bmax[0], tile->header->bmax[1], vCamera.z)) > navmesh_debug_camera_range->GetFloat()) camera.DistTo(Vector3D(tile->header->bmax[0], tile->header->bmax[1], camera.z)) > range)
continue; continue;
} }
@ -167,41 +167,76 @@ void DrawNavMeshPortals()
for (int side = 0; side < 8; ++side) for (int side = 0; side < 8; ++side)
{ {
unsigned short m = DT_EXT_LINK | static_cast<unsigned short>(side); unsigned short m = DT_EXT_LINK | static_cast<unsigned short>(side);
for (int i = 0; i < tile->header->polyCount; ++i) for (int k = 0, e = tile->header->polyCount; k < e; ++k)
{ {
const dtPoly* poly = &tile->polys[i]; const dtPoly* poly = &tile->polys[k];
// Create new links. // Create new links.
const int nv = poly->vertCount; const int nv = poly->vertCount;
for (int j = 0; j < nv; ++j) for (int v = 0; v < nv; ++v)
{ {
// Skip edges which do not point to the right side. // Skip edges which do not point to the right side.
if (poly->neis[j] != m) if (poly->neis[v] != m)
continue; continue;
// Create new links // Create new links
const float* va = &tile->verts[poly->verts[j] * 3]; const float* va = &tile->verts[poly->verts[v] * 3];
const float* vb = &tile->verts[poly->verts[(j + 1) % nv] * 3]; const float* vb = &tile->verts[poly->verts[(v + 1) % nv] * 3];
/*****************
Vertex indices:
va - = 0 +------+
vb - = 1 | |
va + = 2 | |
vb + = 3 +------+
*****************/
__m128 verts = _mm_setr_ps(va[2], vb[2], va[2], vb[2]);
verts = _mm_sub_ps(verts, _mm_setr_ps(padz, padz, 0.0f, 0.0f));
verts = _mm_add_ps(verts, _mm_setr_ps(0.0f, 0.0f, padz, padz));
if (side == 0 || side == 4) if (side == 0 || side == 4)
{ {
Color col = side == 0 ? Color(188, 0, 0, 255) : Color(188, 0, 188, 255); Color col = side == 0 ? Color(188, 0, 0, 255) : Color(188, 0, 188, 255);
const float x = va[0] + ((side == 0) ? -padx : padx); const float x = va[0] + ((side == 0) ? -padx : padx);
v_RenderLine(Vector3D(x, va[1], va[2] - padz), Vector3D(x, va[1], va[2] + padz), col, r_debug_overlay_zbuffer->GetBool()); __m128 origin = _mm_setr_ps(x, va[1], verts.m128_f32[0], 0);
v_RenderLine(Vector3D(x, va[1], va[2] + padz), Vector3D(x, vb[1], vb[2] + padz), col, r_debug_overlay_zbuffer->GetBool()); __m128 dest = _mm_setr_ps(x, va[1], verts.m128_f32[2], 0);
v_RenderLine(Vector3D(x, vb[1], vb[2] + padz), Vector3D(x, vb[1], vb[2] - padz), col, r_debug_overlay_zbuffer->GetBool()); v_RenderLine(*reinterpret_cast<Vector3D*>(&origin),
v_RenderLine(Vector3D(x, vb[1], vb[2] - padz), Vector3D(x, va[1], va[2] - padz), col, r_debug_overlay_zbuffer->GetBool()); *reinterpret_cast<Vector3D*>(&dest), col, zbuffer);
origin = _mm_setr_ps(x, va[1], verts.m128_f32[2], 0);
dest = _mm_setr_ps(x, vb[1], verts.m128_f32[3], 0);
v_RenderLine(*reinterpret_cast<Vector3D*>(&origin),
*reinterpret_cast<Vector3D*>(&dest), col, zbuffer);
origin = _mm_setr_ps(x, vb[1], verts.m128_f32[3], 0);
dest = _mm_setr_ps(x, vb[1], verts.m128_f32[1], 0);
v_RenderLine(*reinterpret_cast<Vector3D*>(&origin),
*reinterpret_cast<Vector3D*>(&dest), col, zbuffer);
origin = _mm_setr_ps(x, vb[1], verts.m128_f32[1], 0);
dest = _mm_setr_ps(x, va[1], verts.m128_f32[0], 0);
v_RenderLine(*reinterpret_cast<Vector3D*>(&origin),
*reinterpret_cast<Vector3D*>(&dest), col, zbuffer);
} }
else if (side == 2 || side == 6) else if (side == 2 || side == 6)
{ {
Color col = side == 2 ? Color(0, 188, 0, 255) : Color(188, 188, 0, 255); Color col = side == 2 ? Color(0, 188, 0, 255) : Color(188, 188, 0, 255);
const float y = va[1] + ((side == 2) ? -padx : padx); const float y = va[1] + ((side == 2) ? -padx : padx);
v_RenderLine(Vector3D(va[0], y, va[2] - padz), Vector3D(va[0], y, va[2] + padz), col, r_debug_overlay_zbuffer->GetBool()); __m128 origin = _mm_setr_ps(va[0], y, verts.m128_f32[0], 0);
v_RenderLine(Vector3D(va[0], y, va[2] + padz), Vector3D(vb[0], y, vb[2] + padz), col, r_debug_overlay_zbuffer->GetBool()); __m128 dest = _mm_setr_ps(va[0], y, verts.m128_f32[2], 0);
v_RenderLine(Vector3D(vb[0], y, vb[2] + padz), Vector3D(vb[0], y, vb[2] - padz), col, r_debug_overlay_zbuffer->GetBool()); v_RenderLine(*reinterpret_cast<Vector3D*>(&origin),
v_RenderLine(Vector3D(vb[0], y, vb[2] - padz), Vector3D(va[0], y, va[2] - padz), col, r_debug_overlay_zbuffer->GetBool()); *reinterpret_cast<Vector3D*>(&dest), col, zbuffer);
origin = _mm_setr_ps(va[0], y, verts.m128_f32[2], 0);
dest = _mm_setr_ps(vb[0], y, verts.m128_f32[3], 0);
v_RenderLine(*reinterpret_cast<Vector3D*>(&origin),
*reinterpret_cast<Vector3D*>(&dest), col, zbuffer);
origin = _mm_setr_ps(vb[0], y, verts.m128_f32[3], 0);
dest = _mm_setr_ps(vb[0], y, verts.m128_f32[1], 0);
v_RenderLine(*reinterpret_cast<Vector3D*>(&origin),
*reinterpret_cast<Vector3D*>(&dest), col, zbuffer);
origin = _mm_setr_ps(vb[0], y, verts.m128_f32[1], 0);
dest = _mm_setr_ps(va[0], y, verts.m128_f32[0], 0);
v_RenderLine(*reinterpret_cast<Vector3D*>(&origin),
*reinterpret_cast<Vector3D*>(&dest), col, zbuffer);
} }
} }
} }