71 #ifndef INCLUDED_volk_32f_x2_min_32f_a_H
72 #define INCLUDED_volk_32f_x2_min_32f_a_H
78 #include <xmmintrin.h>
83 unsigned int num_points)
85 unsigned int number = 0;
86 const unsigned int quarterPoints = num_points / 4;
88 float* cPtr = cVector;
89 const float* aPtr = aVector;
90 const float* bPtr = bVector;
92 __m128 aVal, bVal, cVal;
93 for (; number < quarterPoints; number++) {
94 aVal = _mm_load_ps(aPtr);
95 bVal = _mm_load_ps(bPtr);
97 cVal = _mm_min_ps(aVal, bVal);
99 _mm_store_ps(cPtr, cVal);
106 number = quarterPoints * 4;
107 for (; number < num_points; number++) {
108 const float a = *aPtr++;
109 const float b = *bPtr++;
110 *cPtr++ = (a < b ? a : b);
117 #include <arm_neon.h>
120 const float* aVector,
121 const float* bVector,
122 unsigned int num_points)
124 float* cPtr = cVector;
125 const float* aPtr = aVector;
126 const float* bPtr = bVector;
127 unsigned int number = 0;
128 unsigned int quarter_points = num_points / 4;
130 float32x4_t a_vec, b_vec, c_vec;
131 for (number = 0; number < quarter_points; number++) {
132 a_vec = vld1q_f32(aPtr);
133 b_vec = vld1q_f32(bPtr);
135 c_vec = vminq_f32(a_vec, b_vec);
137 vst1q_f32(cPtr, c_vec);
143 for (number = quarter_points * 4; number < num_points; number++) {
144 const float a = *aPtr++;
145 const float b = *bPtr++;
146 *cPtr++ = (a < b ? a : b);
152 #ifdef LV_HAVE_GENERIC
155 const float* aVector,
156 const float* bVector,
157 unsigned int num_points)
159 float* cPtr = cVector;
160 const float* aPtr = aVector;
161 const float* bPtr = bVector;
162 unsigned int number = 0;
164 for (number = 0; number < num_points; number++) {
165 const float a = *aPtr++;
166 const float b = *bPtr++;
167 *cPtr++ = (a < b ? a : b);
175 extern void volk_32f_x2_min_32f_a_orc_impl(
float* cVector,
176 const float* aVector,
177 const float* bVector,
178 unsigned int num_points);
180 static inline void volk_32f_x2_min_32f_u_orc(
float* cVector,
181 const float* aVector,
182 const float* bVector,
183 unsigned int num_points)
185 volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points);
190 #include <immintrin.h>
193 const float* aVector,
194 const float* bVector,
195 unsigned int num_points)
197 unsigned int number = 0;
198 const unsigned int eighthPoints = num_points / 8;
200 float* cPtr = cVector;
201 const float* aPtr = aVector;
202 const float* bPtr = bVector;
204 __m256 aVal, bVal, cVal;
205 for (; number < eighthPoints; number++) {
206 aVal = _mm256_load_ps(aPtr);
207 bVal = _mm256_load_ps(bPtr);
209 cVal = _mm256_min_ps(aVal, bVal);
211 _mm256_store_ps(cPtr, cVal);
218 number = eighthPoints * 8;
219 for (; number < num_points; number++) {
220 const float a = *aPtr++;
221 const float b = *bPtr++;
222 *cPtr++ = (a < b ? a : b);
227 #ifdef LV_HAVE_AVX512F
228 #include <immintrin.h>
230 static inline void volk_32f_x2_min_32f_a_avx512f(
float* cVector,
231 const float* aVector,
232 const float* bVector,
233 unsigned int num_points)
235 unsigned int number = 0;
236 const unsigned int sixteenthPoints = num_points / 16;
238 float* cPtr = cVector;
239 const float* aPtr = aVector;
240 const float* bPtr = bVector;
242 __m512 aVal, bVal, cVal;
243 for (; number < sixteenthPoints; number++) {
244 aVal = _mm512_load_ps(aPtr);
245 bVal = _mm512_load_ps(bPtr);
247 cVal = _mm512_min_ps(aVal, bVal);
249 _mm512_store_ps(cPtr, cVal);
256 number = sixteenthPoints * 16;
257 for (; number < num_points; number++) {
258 const float a = *aPtr++;
259 const float b = *bPtr++;
260 *cPtr++ = (a < b ? a : b);
268 #ifndef INCLUDED_volk_32f_x2_min_32f_u_H
269 #define INCLUDED_volk_32f_x2_min_32f_u_H
271 #include <inttypes.h>
274 #ifdef LV_HAVE_AVX512F
275 #include <immintrin.h>
277 static inline void volk_32f_x2_min_32f_u_avx512f(
float* cVector,
278 const float* aVector,
279 const float* bVector,
280 unsigned int num_points)
282 unsigned int number = 0;
283 const unsigned int sixteenthPoints = num_points / 16;
285 float* cPtr = cVector;
286 const float* aPtr = aVector;
287 const float* bPtr = bVector;
289 __m512 aVal, bVal, cVal;
290 for (; number < sixteenthPoints; number++) {
291 aVal = _mm512_loadu_ps(aPtr);
292 bVal = _mm512_loadu_ps(bPtr);
294 cVal = _mm512_min_ps(aVal, bVal);
296 _mm512_storeu_ps(cPtr, cVal);
303 number = sixteenthPoints * 16;
304 for (; number < num_points; number++) {
305 const float a = *aPtr++;
306 const float b = *bPtr++;
307 *cPtr++ = (a < b ? a : b);
313 #include <immintrin.h>
316 const float* aVector,
317 const float* bVector,
318 unsigned int num_points)
320 unsigned int number = 0;
321 const unsigned int eighthPoints = num_points / 8;
323 float* cPtr = cVector;
324 const float* aPtr = aVector;
325 const float* bPtr = bVector;
327 __m256 aVal, bVal, cVal;
328 for (; number < eighthPoints; number++) {
329 aVal = _mm256_loadu_ps(aPtr);
330 bVal = _mm256_loadu_ps(bPtr);
332 cVal = _mm256_min_ps(aVal, bVal);
334 _mm256_storeu_ps(cPtr, cVal);
341 number = eighthPoints * 8;
342 for (; number < num_points; number++) {
343 const float a = *aPtr++;
344 const float b = *bPtr++;
345 *cPtr++ = (a < b ? a : b);
static void volk_32f_x2_min_32f_a_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:80
static void volk_32f_x2_min_32f_neon(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:119
static void volk_32f_x2_min_32f_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:154
static void volk_32f_x2_min_32f_u_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:315
static void volk_32f_x2_min_32f_a_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:192