GNU Radio 3.4.0 C++ API
volk_8ic_s32f_deinterleave_real_32f_a16.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H
00002 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #if LV_HAVE_SSE4_1
00008 #include <smmintrin.h>
00009 /*!
00010   \brief Deinterleaves the complex 8 bit vector into I float vector data
00011   \param complexVector The complex input vector
00012   \param iBuffer The I buffer output data
00013   \param scalar The scaling value being multiplied against each data point
00014   \param num_points The number of complex data values to be deinterleaved
00015 */
00016 static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse4_1(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
00017   float* iBufferPtr = iBuffer;
00018 
00019   unsigned int number = 0;
00020   const unsigned int eighthPoints = num_points / 8;    
00021   __m128 iFloatValue;
00022 
00023   const float iScalar= 1.0 / scalar;
00024   __m128 invScalar = _mm_set_ps1(iScalar);
00025   __m128i complexVal, iIntVal;
00026   int8_t* complexVectorPtr = (int8_t*)complexVector;
00027 
00028   __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
00029 
00030   for(;number < eighthPoints; number++){
00031     complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr += 16;
00032     complexVal = _mm_shuffle_epi8(complexVal, moveMask);
00033 
00034     iIntVal = _mm_cvtepi8_epi32(complexVal);
00035     iFloatValue = _mm_cvtepi32_ps(iIntVal);
00036 
00037     iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
00038 
00039     _mm_store_ps(iBufferPtr, iFloatValue);
00040 
00041     iBufferPtr += 4;
00042 
00043     complexVal = _mm_srli_si128(complexVal, 4);
00044     iIntVal = _mm_cvtepi8_epi32(complexVal);
00045     iFloatValue = _mm_cvtepi32_ps(iIntVal);
00046 
00047     iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
00048 
00049     _mm_store_ps(iBufferPtr, iFloatValue);
00050 
00051     iBufferPtr += 4;
00052   }
00053 
00054   number = eighthPoints * 8;
00055   for(; number < num_points; number++){
00056     *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
00057     complexVectorPtr++;
00058   }
00059     
00060 }
00061 #endif /* LV_HAVE_SSE4_1 */
00062 
00063 
00064 #if LV_HAVE_SSE
00065 #include <xmmintrin.h>
00066 /*!
00067   \brief Deinterleaves the complex 8 bit vector into I float vector data
00068   \param complexVector The complex input vector
00069   \param iBuffer The I buffer output data
00070   \param scalar The scaling value being multiplied against each data point
00071   \param num_points The number of complex data values to be deinterleaved
00072 */
00073 static inline void volk_8ic_s32f_deinterleave_real_32f_a16_sse(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
00074   float* iBufferPtr = iBuffer;
00075 
00076   unsigned int number = 0;
00077   const unsigned int quarterPoints = num_points / 4;    
00078   __m128 iValue;
00079 
00080   const float iScalar= 1.0 / scalar;
00081   __m128 invScalar = _mm_set_ps1(iScalar);
00082   int8_t* complexVectorPtr = (int8_t*)complexVector;
00083 
00084   float floatBuffer[4] __attribute__((aligned(128)));
00085 
00086   for(;number < quarterPoints; number++){
00087     floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
00088     floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
00089     floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
00090     floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2; 
00091 
00092     iValue = _mm_load_ps(floatBuffer);
00093 
00094     iValue = _mm_mul_ps(iValue, invScalar);
00095 
00096     _mm_store_ps(iBufferPtr, iValue);
00097 
00098     iBufferPtr += 4;
00099   }
00100 
00101   number = quarterPoints * 4;
00102   for(; number < num_points; number++){
00103     *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
00104     complexVectorPtr++;
00105   }
00106     
00107 }
00108 #endif /* LV_HAVE_SSE */
00109 
00110 #if LV_HAVE_GENERIC
00111 /*!
00112   \brief Deinterleaves the complex 8 bit vector into I float vector data
00113   \param complexVector The complex input vector
00114   \param iBuffer The I buffer output data
00115   \param scalar The scaling value being multiplied against each data point
00116   \param num_points The number of complex data values to be deinterleaved
00117 */
00118 static inline void volk_8ic_s32f_deinterleave_real_32f_a16_generic(float* iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
00119   unsigned int number = 0;
00120   const int8_t* complexVectorPtr = (const int8_t*)complexVector;
00121   float* iBufferPtr = iBuffer;
00122   const float invScalar = 1.0 / scalar;
00123   for(number = 0; number < num_points; number++){
00124     *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
00125     complexVectorPtr++;
00126   }
00127 }
00128 #endif /* LV_HAVE_GENERIC */
00129 
00130 
00131 
00132 
00133 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a16_H */