55 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
56 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
63 #include <immintrin.h>
66 volk_8ic_s32f_deinterleave_real_32f_a_avx2(
float* iBuffer,
69 unsigned int num_points)
71 float* iBufferPtr = iBuffer;
73 unsigned int number = 0;
74 const unsigned int sixteenthPoints = num_points / 16;
77 const float iScalar = 1.0 / scalar;
78 __m256 invScalar = _mm256_set1_ps(iScalar);
79 __m256i complexVal, iIntVal;
80 int8_t* complexVectorPtr = (int8_t*)complexVector;
82 __m256i moveMask = _mm256_set_epi8(0x80,
114 for (; number < sixteenthPoints; number++) {
115 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
116 complexVectorPtr += 32;
117 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
119 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
120 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
121 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
122 _mm256_store_ps(iBufferPtr, iFloatValue);
125 complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
126 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
127 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
128 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
129 _mm256_store_ps(iBufferPtr, iFloatValue);
133 number = sixteenthPoints * 16;
134 for (; number < num_points; number++) {
135 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
142 #ifdef LV_HAVE_SSE4_1
143 #include <smmintrin.h>
146 volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(
float* iBuffer,
149 unsigned int num_points)
151 float* iBufferPtr = iBuffer;
153 unsigned int number = 0;
154 const unsigned int eighthPoints = num_points / 8;
157 const float iScalar = 1.0 / scalar;
158 __m128 invScalar = _mm_set_ps1(iScalar);
159 __m128i complexVal, iIntVal;
160 int8_t* complexVectorPtr = (int8_t*)complexVector;
162 __m128i moveMask = _mm_set_epi8(
163 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
165 for (; number < eighthPoints; number++) {
166 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
167 complexVectorPtr += 16;
168 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
170 iIntVal = _mm_cvtepi8_epi32(complexVal);
171 iFloatValue = _mm_cvtepi32_ps(iIntVal);
173 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
175 _mm_store_ps(iBufferPtr, iFloatValue);
179 complexVal = _mm_srli_si128(complexVal, 4);
180 iIntVal = _mm_cvtepi8_epi32(complexVal);
181 iFloatValue = _mm_cvtepi32_ps(iIntVal);
183 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
185 _mm_store_ps(iBufferPtr, iFloatValue);
190 number = eighthPoints * 8;
191 for (; number < num_points; number++) {
192 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
200 #include <xmmintrin.h>
206 unsigned int num_points)
208 float* iBufferPtr = iBuffer;
210 unsigned int number = 0;
211 const unsigned int quarterPoints = num_points / 4;
214 const float iScalar = 1.0 / scalar;
215 __m128 invScalar = _mm_set_ps1(iScalar);
216 int8_t* complexVectorPtr = (int8_t*)complexVector;
220 for (; number < quarterPoints; number++) {
221 floatBuffer[0] = (float)(*complexVectorPtr);
222 complexVectorPtr += 2;
223 floatBuffer[1] = (float)(*complexVectorPtr);
224 complexVectorPtr += 2;
225 floatBuffer[2] = (float)(*complexVectorPtr);
226 complexVectorPtr += 2;
227 floatBuffer[3] = (float)(*complexVectorPtr);
228 complexVectorPtr += 2;
230 iValue = _mm_load_ps(floatBuffer);
232 iValue = _mm_mul_ps(iValue, invScalar);
234 _mm_store_ps(iBufferPtr, iValue);
239 number = quarterPoints * 4;
240 for (; number < num_points; number++) {
241 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
248 #ifdef LV_HAVE_GENERIC
254 unsigned int num_points)
256 unsigned int number = 0;
257 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
258 float* iBufferPtr = iBuffer;
259 const float invScalar = 1.0 / scalar;
260 for (number = 0; number < num_points; number++) {
261 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
270 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
271 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
273 #include <inttypes.h>
278 #include <immintrin.h>
281 volk_8ic_s32f_deinterleave_real_32f_u_avx2(
float* iBuffer,
284 unsigned int num_points)
286 float* iBufferPtr = iBuffer;
288 unsigned int number = 0;
289 const unsigned int sixteenthPoints = num_points / 16;
292 const float iScalar = 1.0 / scalar;
293 __m256 invScalar = _mm256_set1_ps(iScalar);
294 __m256i complexVal, iIntVal;
296 int8_t* complexVectorPtr = (int8_t*)complexVector;
298 __m256i moveMask = _mm256_set_epi8(0x80,
331 for (; number < sixteenthPoints; number++) {
332 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
333 complexVectorPtr += 32;
334 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
336 hcomplexVal = _mm256_extracti128_si256(complexVal, 0);
337 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
338 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
340 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
342 _mm256_storeu_ps(iBufferPtr, iFloatValue);
346 hcomplexVal = _mm256_extracti128_si256(complexVal, 1);
347 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
348 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
350 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
352 _mm256_storeu_ps(iBufferPtr, iFloatValue);
357 number = sixteenthPoints * 16;
358 for (; number < num_points; number++) {
359 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
static void volk_8ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:251
static void volk_8ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:203
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:61