Fix clang-cl compile

Temporarily commented SDL2 audio as some SSE code doesn't work properly with clang-cl
This commit is contained in:
Kawe Mazidjatari 2022-03-27 22:18:58 +02:00
parent b1a806dccb
commit b405125357

View File

@ -79,10 +79,10 @@ SDL_ConvertStereoToMono_SSE3(SDL_AudioCVT * cvt, SDL_AudioFormat format)
/* Do SSE blocks as long as we have 16 bytes available. /* Do SSE blocks as long as we have 16 bytes available.
Just use unaligned load/stores, if the memory at runtime is Just use unaligned load/stores, if the memory at runtime is
aligned it'll be just as fast on modern processors */ aligned it'll be just as fast on modern processors */
while (i >= 4) { /* 4 * float32 */ //while (i >= 4) { /* 4 * float32 */
_mm_storeu_ps(dst, _mm_mul_ps(_mm_hadd_ps(_mm_load_ps(src), _mm_loadu_ps(src+4)), divby2)); // _mm_storeu_ps(dst, _mm_mul_ps(_mm_hadd_ps(_mm_load_ps(src), _mm_loadu_ps(src+4)), divby2));
i -= 4; src += 8; dst += 4; // i -= 4; src += 8; dst += 4;
} //}
/* Finish off any leftovers with scalar operations. */ /* Finish off any leftovers with scalar operations. */
while (i) { while (i) {
@ -202,31 +202,31 @@ SDL_Convert51ToStereo_SSE(SDL_AudioCVT * cvt, SDL_AudioFormat format)
/* SDL's 5.1 layout: FL+FR+FC+LFE+BL+BR */ /* SDL's 5.1 layout: FL+FR+FC+LFE+BL+BR */
/* Just use unaligned load/stores, if the memory at runtime is */ /* Just use unaligned load/stores, if the memory at runtime is */
/* aligned it'll be just as fast on modern processors */ /* aligned it'll be just as fast on modern processors */
while (i >= 2) { //while (i >= 2) {
/* Two 5.1 samples (12 floats) fit nicely in three 128bit */ // /* Two 5.1 samples (12 floats) fit nicely in three 128bit */
/* registers. Using shuffles they can be rearranged so that */ // /* registers. Using shuffles they can be rearranged so that */
/* the conversion math can be vectorized. */ // /* the conversion math can be vectorized. */
__m128 in0 = _mm_loadu_ps(src); /* 0FL 0FR 0FC 0LF */ // __m128 in0 = _mm_loadu_ps(src); /* 0FL 0FR 0FC 0LF */
__m128 in1 = _mm_loadu_ps(src + 4); /* 0BL 0BR 1FL 1FR */ // __m128 in1 = _mm_loadu_ps(src + 4); /* 0BL 0BR 1FL 1FR */
__m128 in2 = _mm_loadu_ps(src + 8); /* 1FC 1LF 1BL 1BR */ // __m128 in2 = _mm_loadu_ps(src + 8); /* 1FC 1LF 1BL 1BR */
/* 0FC 0FC 1FC 1FC */ // /* 0FC 0FC 1FC 1FC */
__m128 fc_distributed = _mm_mul_ps(half, _mm_shuffle_ps(in0, in2, _MM_SHUFFLE(0, 0, 2, 2))); // __m128 fc_distributed = _mm_mul_ps(half, _mm_shuffle_ps(in0, in2, _MM_SHUFFLE(0, 0, 2, 2)));
/* 0FL 0FR 1BL 1BR */ // /* 0FL 0FR 1BL 1BR */
__m128 blended = _mm_shuffle_ps(in0, in2, _MM_SHUFFLE(3, 2, 1, 0)); // __m128 blended = _mm_shuffle_ps(in0, in2, _MM_SHUFFLE(3, 2, 1, 0));
/* 0FL 0FR 1BL 1BR */ // /* 0FL 0FR 1BL 1BR */
/* + 0BL 0BR 1FL 1FR */ // /* + 0BL 0BR 1FL 1FR */
/* = 0L 0R 1L 1R */ // /* = 0L 0R 1L 1R */
__m128 out = _mm_add_ps(blended, in1); // __m128 out = _mm_add_ps(blended, in1);
out = _mm_add_ps(out, fc_distributed); // out = _mm_add_ps(out, fc_distributed);
out = _mm_mul_ps(out, two_fifths_v); // out = _mm_mul_ps(out, two_fifths_v);
_mm_storeu_ps(dst, out); // _mm_storeu_ps(dst, out);
i -= 2; src += 12; dst += 4; // i -= 2; src += 12; dst += 4;
} //}
/* Finish off any leftovers with scalar operations. */ /* Finish off any leftovers with scalar operations. */