spandsp  0.0.6
fast_convert.h
1 /*
2  * SpanDSP - a series of DSP components for telephony
3  *
4  * fast_convert.h - Quick ways to convert floating point numbers to integers
5  *
6  * Written by Steve Underwood <steveu@coppice.org>
7  *
8  * Copyright (C) 2009 Steve Underwood
9  *
10  * All rights reserved.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU Lesser General Public License version 2.1,
14  * as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this program; if not, write to the Free Software
23  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24  */
25 
26 #if !defined(_SPANDSP_FAST_CONVERT_H_)
27 #define _SPANDSP_FAST_CONVERT_H_
28 
29 #if defined(__cplusplus)
30 extern "C"
31 {
32 #endif
33 
34 /* The following code, to handle issues with lrint() and lrintf() on various
35  * platforms, is adapted from similar code in libsndfile, which is:
36  *
37  * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com>
38  *
39  * This program is free software; you can redistribute it and/or modify
40  * it under the terms of the GNU Lesser General Public License as published by
41  * the Free Software Foundation; either version 2.1 of the License, or
42  * (at your option) any later version.
43  *
44  * This program is distributed in the hope that it will be useful,
45  * but WITHOUT ANY WARRANTY; without even the implied warranty of
46  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
47  * GNU Lesser General Public License for more details.
48  */
49 
50 /*
51  * On Intel Pentium processors (especially PIII and probably P4), converting
52  * from float to int is very slow. To meet the C specs, the code produced by
53  * most C compilers targeting Pentium needs to change the FPU rounding mode
54  * before the float to int conversion is performed.
55  *
56  * Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
57  * is this flushing of the pipeline which is so slow.
58  *
59  * Fortunately the ISO C99 specification defines the functions lrint, lrintf,
60  * llrint and llrintf which fix this problem as a side effect.
61  *
62  * On Unix-like systems, the configure process should have detected the
63  * presence of these functions. If they weren't found we have to replace them
64  * here with a standard C cast.
65  */
66 
67 /*
68  * The C99 prototypes for these functions are as follows:
69  *
70  * int rintf(float x);
71  * int rint(double x);
72  * long int lrintf(float x);
73  * long int lrint(double x);
74  * long long int llrintf(float x);
75  * long long int llrint(double x);
76  *
77  * The presence of the required functions are detected during the configure
78  * process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
79  * the config file.
80  */
81 
82 #if defined(__CYGWIN__)
83 #if !defined(__cplusplus) && (__GNUC__ < 4)
84  /*
85  * CYGWIN versions prior to 1.7.1 have lrint and lrintf functions, but
86  * they are slow and buggy:
87  * http://sourceware.org/ml/cygwin/2005-06/msg00153.html
88  * http://sourceware.org/ml/cygwin/2005-09/msg00047.html
89  * These replacement functions (pulled from the Public Domain MinGW
90  * math.h header) replace the native versions.
91  */
92  static __inline__ long int lrint(double x)
93  {
94  long int retval;
95 
96  __asm__ __volatile__
97  (
98  "fistpl %0"
99  : "=m" (retval)
100  : "t" (x)
101  : "st"
102  );
103 
104  return retval;
105  }
106 
107  static __inline__ long int lrintf(float x)
108  {
109  long int retval;
110 
111  __asm__ __volatile__
112  (
113  "fistpl %0"
114  : "=m" (retval)
115  : "t" (x)
116  : "st"
117  );
118  return retval;
119  }
120 #endif
121 
122  /* The fastest way to convert is the equivalent of lrint() */
123  static __inline__ long int lfastrint(double x)
124  {
125  long int retval;
126 
127  __asm__ __volatile__
128  (
129  "fistpl %0"
130  : "=m" (retval)
131  : "t" (x)
132  : "st"
133  );
134 
135  return retval;
136  }
137 
138  static __inline__ long int lfastrintf(float x)
139  {
140  long int retval;
141 
142  __asm__ __volatile__
143  (
144  "fistpl %0"
145  : "=m" (retval)
146  : "t" (x)
147  : "st"
148  );
149  return retval;
150  }
151 #elif defined(__GNUC__) || (__SUNPRO_C >= 0x0590)
152 
153 #if defined(__i386__)
154  /* These routines are guaranteed fast on an i386 machine. Using the built in
155  lrint() and lrintf() should be similar, but they may not always be enabled.
156  Sometimes, especially with "-O0", you might get slow calls to routines. */
157  static __inline__ long int lfastrint(double x)
158  {
159  long int retval;
160 
161  __asm__ __volatile__
162  (
163  "fistpl %0"
164  : "=m" (retval)
165  : "t" (x)
166  : "st"
167  );
168 
169  return retval;
170  }
171 
172  static __inline__ long int lfastrintf(float x)
173  {
174  long int retval;
175 
176  __asm__ __volatile__
177  (
178  "fistpl %0"
179  : "=m" (retval)
180  : "t" (x)
181  : "st"
182  );
183  return retval;
184  }
185 #elif defined(__x86_64__)
186  /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a
187  double or float to an int. It looks like the design on the x86_64 took account
188  of the default behaviour specified for C. */
189  static __inline__ long int lfastrint(double x)
190  {
191  return (long int) (x);
192  }
193 
194  static __inline__ long int lfastrintf(float x)
195  {
196  return (long int) (x);
197  }
198 #elif defined(__ppc__) || defined(__powerpc__)
199  static __inline__ long int lfastrint(register double x)
200  {
201  int res[2];
202 
203  __asm__ __volatile__
204  (
205  "fctiw %1, %1\n\t"
206  "stfd %1, %0"
207  : "=m" (res) /* Output */
208  : "f" (x) /* Input */
209  : "memory"
210  );
211 
212  return res[1];
213  }
214 
215  static __inline__ long int lfastrintf(register float x)
216  {
217  int res[2];
218 
219  __asm__ __volatile__
220  (
221  "fctiw %1, %1\n\t"
222  "stfd %1, %0"
223  : "=m" (res) /* Output */
224  : "f" (x) /* Input */
225  : "memory"
226  );
227 
228  return res[1];
229  }
230 #else
231  /* Fallback routines, for unrecognised platforms */
232  static __inline__ long int lfastrint(double x)
233  {
234  return (long int) x;
235  }
236 
237  static __inline__ long int lfastrintf(float x)
238  {
239  return (long int) x;
240  }
241 #endif
242 
243 #elif defined(_M_IX86)
244  /* Visual Studio i386 */
245  /*
246  * Win32 doesn't seem to have the lrint() and lrintf() functions.
247  * Therefore implement inline versions of these functions here.
248  */
249 
250  __inline long int lrint(double x)
251  {
252  long int i;
253 
254  _asm
255  {
256  fld x
257  fistp i
258  };
259  return i;
260  }
261 
262  __inline long int lrintf(float x)
263  {
264  long int i;
265 
266  _asm
267  {
268  fld x
269  fistp i
270  };
271  return i;
272  }
273 
274  __inline float rintf(float flt)
275  {
276  _asm
277  { fld flt
278  frndint
279  }
280  }
281 
282  __inline double rint(double dbl)
283  {
284  _asm
285  {
286  fld dbl
287  frndint
288  }
289  }
290 
291  __inline long int lfastrint(double x)
292  {
293  long int i;
294 
295  _asm
296  {
297  fld x
298  fistp i
299  };
300  return i;
301  }
302 
303  __inline long int lfastrintf(float x)
304  {
305  long int i;
306 
307  _asm
308  {
309  fld x
310  fistp i
311  };
312  return i;
313  }
314 #elif defined(_M_X64)
315  /* Visual Studio x86_64 */
316  /* x86_64 machines will do best with a simple assignment. */
317 #include <intrin.h>
318 
319  __inline long int lrint(double x)
320  {
321  return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) );
322  }
323 
324  __inline long int lrintf(float x)
325  {
326  return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) );
327  }
328 
329  __inline long int lfastrint(double x)
330  {
331  return (long int) (x);
332  }
333 
334  __inline long int lfastrintf(float x)
335  {
336  return (long int) (x);
337  }
338 #elif defined(__MWERKS__) && defined(macintosh)
339  /* This MacOS 9 solution was provided by Stephane Letz */
340 
341  long int __inline__ lfastrint(register double x)
342  {
343  long int res[2];
344 
345  asm
346  {
347  fctiw x, x
348  stfd x, res
349  }
350  return res[1];
351  }
352 
353  long int __inline__ lfastrintf(register float x)
354  {
355  long int res[2];
356 
357  asm
358  {
359  fctiw x, x
360  stfd x, res
361  }
362  return res[1];
363  }
364 #elif defined(__MACH__) && defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
365  /* For Apple Mac OS/X - do recent versions still need this? */
366 
367  static __inline__ long int lfastrint(register double x)
368  {
369  int res[2];
370 
371  __asm__ __volatile__
372  (
373  "fctiw %1, %1\n\t"
374  "stfd %1, %0"
375  : "=m" (res) /* Output */
376  : "f" (x) /* Input */
377  : "memory"
378  );
379 
380  return res[1];
381  }
382 
383  static __inline__ long int lfastrintf(register float x)
384  {
385  int res[2];
386 
387  __asm__ __volatile__
388  (
389  "fctiw %1, %1\n\t"
390  "stfd %1, %0"
391  : "=m" (res) /* Output */
392  : "f" (x) /* Input */
393  : "memory"
394  );
395 
396  return res[1];
397  }
398 #else
399  /* There is nothing else to do, but use a simple casting operation, instead of a real
400  rint() type function. Since we are only trying to use rint() to speed up conversions,
401  the accuracy issues related to changing the rounding scheme are of little concern
402  to us. */
403 
404  #if !defined(__sgi) && !defined(__sunos) && !defined(__solaris) && !defined(__sun)
405  #warning "No usable lrint() and lrintf() functions available."
406  #warning "Replacing these functions with a simple C cast."
407  #endif
408 
409  static __inline__ long int lrint(double x)
410  {
411  return (long int) (x);
412  }
413 
414  static __inline__ long int lrintf(float x)
415  {
416  return (long int) (x);
417  }
418 
419  static __inline__ long int lfastrint(double x)
420  {
421  return (long int) (x);
422  }
423 
424  static __inline__ long int lfastrintf(float x)
425  {
426  return (long int) (x);
427  }
428 #endif
429 
430 #if defined(__cplusplus)
431 }
432 #endif
433 
434 #endif
435 
436 /*- End of file ------------------------------------------------------------*/