54 #ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
55 #define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
60 #include <immintrin.h>
62 static inline void volk_16ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer,
65 unsigned int num_points)
67 unsigned int number = 0;
68 const int8_t* complexVectorPtr = (int8_t*)complexVector;
69 int16_t* iBufferPtr = iBuffer;
70 int16_t* qBufferPtr = qBuffer;
72 __m256i MoveMask = _mm256_set_epi8(15,
105 __m256i iMove2, iMove1;
106 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
108 unsigned int sixteenthPoints = num_points / 16;
110 for (number = 0; number < sixteenthPoints; number++) {
111 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
112 complexVectorPtr += 32;
113 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
114 complexVectorPtr += 32;
116 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
117 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
119 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x08),
120 _mm256_permute4x64_epi64(iMove2, 0x80),
122 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x0d),
123 _mm256_permute4x64_epi64(iMove2, 0xd0),
126 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
127 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
133 number = sixteenthPoints * 16;
134 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
135 for (; number < num_points; number++) {
136 *iBufferPtr++ = *int16ComplexVectorPtr++;
137 *qBufferPtr++ = *int16ComplexVectorPtr++;
143 #include <tmmintrin.h>
148 unsigned int num_points)
150 unsigned int number = 0;
151 const int8_t* complexVectorPtr = (int8_t*)complexVector;
152 int16_t* iBufferPtr = iBuffer;
153 int16_t* qBufferPtr = qBuffer;
155 __m128i iMoveMask1 = _mm_set_epi8(
156 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
157 __m128i iMoveMask2 = _mm_set_epi8(
158 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
160 __m128i qMoveMask1 = _mm_set_epi8(
161 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 14, 11, 10, 7, 6, 3, 2);
162 __m128i qMoveMask2 = _mm_set_epi8(
163 15, 14, 11, 10, 7, 6, 3, 2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
165 __m128i complexVal1, complexVal2, iOutputVal, qOutputVal;
167 unsigned int eighthPoints = num_points / 8;
169 for (number = 0; number < eighthPoints; number++) {
170 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
171 complexVectorPtr += 16;
172 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
173 complexVectorPtr += 16;
175 iOutputVal = _mm_or_si128(_mm_shuffle_epi8(complexVal1, iMoveMask1),
176 _mm_shuffle_epi8(complexVal2, iMoveMask2));
177 qOutputVal = _mm_or_si128(_mm_shuffle_epi8(complexVal1, qMoveMask1),
178 _mm_shuffle_epi8(complexVal2, qMoveMask2));
180 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
181 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
187 number = eighthPoints * 8;
188 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
189 for (; number < num_points; number++) {
190 *iBufferPtr++ = *int16ComplexVectorPtr++;
191 *qBufferPtr++ = *int16ComplexVectorPtr++;
197 #include <emmintrin.h>
202 unsigned int num_points)
204 unsigned int number = 0;
205 const int16_t* complexVectorPtr = (int16_t*)complexVector;
206 int16_t* iBufferPtr = iBuffer;
207 int16_t* qBufferPtr = qBuffer;
208 __m128i complexVal1, complexVal2, iComplexVal1, iComplexVal2, qComplexVal1,
209 qComplexVal2, iOutputVal, qOutputVal;
210 __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF);
211 __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
213 unsigned int eighthPoints = num_points / 8;
215 for (number = 0; number < eighthPoints; number++) {
216 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
217 complexVectorPtr += 8;
218 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
219 complexVectorPtr += 8;
221 iComplexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
223 iComplexVal1 = _mm_shufflehi_epi16(iComplexVal1, _MM_SHUFFLE(3, 1, 2, 0));
225 iComplexVal1 = _mm_shuffle_epi32(iComplexVal1, _MM_SHUFFLE(3, 1, 2, 0));
227 iComplexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3, 1, 2, 0));
229 iComplexVal2 = _mm_shufflehi_epi16(iComplexVal2, _MM_SHUFFLE(3, 1, 2, 0));
231 iComplexVal2 = _mm_shuffle_epi32(iComplexVal2, _MM_SHUFFLE(2, 0, 3, 1));
233 iOutputVal = _mm_or_si128(_mm_and_si128(iComplexVal1, lowMask),
234 _mm_and_si128(iComplexVal2, highMask));
236 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
238 qComplexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(2, 0, 3, 1));
240 qComplexVal1 = _mm_shufflehi_epi16(qComplexVal1, _MM_SHUFFLE(2, 0, 3, 1));
242 qComplexVal1 = _mm_shuffle_epi32(qComplexVal1, _MM_SHUFFLE(3, 1, 2, 0));
244 qComplexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(2, 0, 3, 1));
246 qComplexVal2 = _mm_shufflehi_epi16(qComplexVal2, _MM_SHUFFLE(2, 0, 3, 1));
248 qComplexVal2 = _mm_shuffle_epi32(qComplexVal2, _MM_SHUFFLE(2, 0, 3, 1));
250 qOutputVal = _mm_or_si128(_mm_and_si128(qComplexVal1, lowMask),
251 _mm_and_si128(qComplexVal2, highMask));
253 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
259 number = eighthPoints * 8;
260 for (; number < num_points; number++) {
261 *iBufferPtr++ = *complexVectorPtr++;
262 *qBufferPtr++ = *complexVectorPtr++;
267 #ifdef LV_HAVE_GENERIC
272 unsigned int num_points)
274 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
275 int16_t* iBufferPtr = iBuffer;
276 int16_t* qBufferPtr = qBuffer;
278 for (number = 0; number < num_points; number++) {
279 *iBufferPtr++ = *complexVectorPtr++;
280 *qBufferPtr++ = *complexVectorPtr++;
287 extern void volk_16ic_deinterleave_16i_x2_a_orc_impl(int16_t* iBuffer,
290 unsigned int num_points);
291 static inline void volk_16ic_deinterleave_16i_x2_u_orc(int16_t* iBuffer,
294 unsigned int num_points)
296 volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, num_points);
303 #ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_u_H
304 #define INCLUDED_volk_16ic_deinterleave_16i_x2_u_H
306 #include <inttypes.h>
309 #include <immintrin.h>
311 static inline void volk_16ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
314 unsigned int num_points)
316 unsigned int number = 0;
317 const int8_t* complexVectorPtr = (int8_t*)complexVector;
318 int16_t* iBufferPtr = iBuffer;
319 int16_t* qBufferPtr = qBuffer;
321 __m256i MoveMask = _mm256_set_epi8(15,
354 __m256i iMove2, iMove1;
355 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
357 unsigned int sixteenthPoints = num_points / 16;
359 for (number = 0; number < sixteenthPoints; number++) {
360 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
361 complexVectorPtr += 32;
362 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
363 complexVectorPtr += 32;
365 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
366 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
368 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x08),
369 _mm256_permute4x64_epi64(iMove2, 0x80),
371 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x0d),
372 _mm256_permute4x64_epi64(iMove2, 0xd0),
375 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
376 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
382 number = sixteenthPoints * 16;
383 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
384 for (; number < num_points; number++) {
385 *iBufferPtr++ = *int16ComplexVectorPtr++;
386 *qBufferPtr++ = *int16ComplexVectorPtr++;
static void volk_16ic_deinterleave_16i_x2_generic(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_16i_x2.h:269
static void volk_16ic_deinterleave_16i_x2_a_sse2(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_16i_x2.h:199
static void volk_16ic_deinterleave_16i_x2_a_ssse3(int16_t *iBuffer, int16_t *qBuffer, const lv_16sc_t *complexVector, unsigned int num_points)
Definition: volk_16ic_deinterleave_16i_x2.h:145
short complex lv_16sc_t
Definition: volk_complex.h:62