• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libsphinxbase/fe/yin.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /*
00003  * Copyright (c) 2008 Beyond Access, Inc.  All rights reserved.
00004  * 
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  * 
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice, this list of conditions and the following disclaimer. 
00011  * 
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in
00014  *    the documentation and/or other materials provided with the
00015  *    distribution.
00016  * 
00017  * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY
00018  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00020  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL BEYOND ACCESS, INC.  NOR
00021  * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00022  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00023  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00024  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00025  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00026  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00027  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00028  */
00029 
00035 /* This implements part of the YIN algorithm:
00036  *
00037  * "YIN, a fundamental frequency estimator for speech and music".
00038  * Alain de Cheveigné and Hideki Kawahara.  Journal of the Acoustical
00039  * Society of America, 111 (4), April 2002.
00040  */
00041 
00042 #include "yin.h"
00043 #include "prim_type.h"
00044 #include "ckd_alloc.h"
00045 #include "fixpoint.h"
00046 
00047 #include <stdio.h>
00048 
00049 struct yin_s {
00050     uint16 frame_size;       
00051     uint16 search_threshold; 
00052     uint16 search_range;     
00053     uint16 nfr;              
00055     unsigned char wsize;    
00056     unsigned char wstart;   
00057     unsigned char wcur;     
00058     unsigned char endut;    
00060     fixed32 **diff_window;  
00061     uint16 *period_window;  
00062 };
00063 
00067 static void
00068 cmn_diff(int16 const *signal, int32 *out_diff, int ndiff)
00069 {
00070     uint32 cum, cshift;
00071     int32 t, tscale;
00072 
00073     out_diff[0] = 32768;
00074     cum = 0;
00075     cshift = 0;
00076 
00077     /* Determine how many bits we can scale t up by below. */
00078     for (tscale = 0; tscale < 32; ++tscale)
00079         if (ndiff & (1<<(31-tscale)))
00080             break;
00081     --tscale; /* Avoid teh overflowz. */
00082     /* printf("tscale is %d (ndiff - 1) << tscale is %d\n",
00083        tscale, (ndiff-1) << tscale); */
00084 
00085     /* Somewhat elaborate block floating point implementation.
00086      * The fp implementation of this is really a lot simpler. */
00087     for (t = 1; t < ndiff; ++t) {
00088         uint32 dd, dshift, norm;
00089         int j;
00090 
00091         dd = 0;
00092         dshift = 0;
00093         for (j = 0; j < ndiff; ++j) {
00094             int diff = signal[j] - signal[t + j];
00095             /* Guard against overflows. */
00096             if (dd > (1UL<<tscale)) {
00097                 dd >>= 1;
00098                 ++dshift;
00099             }
00100             dd += (diff * diff) >> dshift;
00101         }
00102         /* Make sure the diffs and cum are shifted to the same
00103          * scaling factor (usually dshift will be zero) */
00104         if (dshift > cshift) {
00105             cum += dd << (dshift-cshift);
00106         }
00107         else {
00108             cum += dd >> (cshift-dshift);
00109         }
00110 
00111         /* Guard against overflows and also ensure that (t<<tscale) > cum. */
00112         while (cum > (1UL<<tscale)) {
00113             cum >>= 1;
00114             ++cshift;
00115         }
00116         /* Avoid divide-by-zero! */
00117         if (cum == 0) cum = 1;
00118         /* Calculate the normalizer in high precision. */
00119         norm = (t << tscale) / cum;
00120         /* Do a long multiply and shift down to Q15. */
00121         out_diff[t] = (int32)(((long long)dd * norm)
00122                               >> (tscale - 15 + cshift - dshift));
00123         /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n",
00124            dd, cshift, dshift, (t<<tscale), cum, norm, out_diff[t]); */
00125     }
00126 }
00127 
00128 yin_t *
00129 yin_init(int frame_size, float search_threshold,
00130          float search_range, int smooth_window)
00131 {
00132     yin_t *pe;
00133 
00134     pe = ckd_calloc(1, sizeof(*pe));
00135     pe->frame_size = frame_size;
00136     pe->search_threshold = (uint16)(search_threshold * 32768);
00137     pe->search_range = (uint16)(search_range * 32768);
00138     pe->wsize = smooth_window * 2 + 1;
00139     pe->diff_window = ckd_calloc_2d(pe->wsize,
00140                                     pe->frame_size / 2,
00141                                     sizeof(**pe->diff_window));
00142     pe->period_window = ckd_calloc(pe->wsize,
00143                                    sizeof(*pe->period_window));
00144     return pe;
00145 }
00146 
00147 void
00148 yin_free(yin_t *pe)
00149 {
00150     ckd_free_2d(pe->diff_window);
00151     ckd_free(pe->period_window);
00152     ckd_free(pe);
00153 }
00154 
00155 void
00156 yin_start(yin_t *pe)
00157 {
00158     /* Reset the circular window pointers. */
00159     pe->wstart = pe->endut = 0;
00160     pe->nfr = 0;
00161 }
00162 
00163 void
00164 yin_end(yin_t *pe)
00165 {
00166     pe->endut = 1;
00167 }
00168 
00169 int
00170 thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end)
00171 {
00172     int i, min, argmin;
00173 
00174     min = INT_MAX;
00175     argmin = 0;
00176     for (i = start; i < end; ++i) {
00177         int diff = diff_window[i];
00178 
00179         if (diff < threshold) {
00180             min = diff;
00181             argmin = i;
00182             break;
00183         }
00184         if (diff < min) {
00185             min = diff;
00186             argmin = i;
00187         }
00188     }
00189     return argmin;
00190 }
00191 
00192 void
00193 yin_write(yin_t *pe, int16 const *frame)
00194 {
00195     int outptr, difflen;
00196 
00197     /* Rotate the window one frame forward. */
00198     ++pe->wstart;
00199     /* Fill in the frame before wstart. */
00200     outptr = pe->wstart - 1;
00201     /* Wrap around the window pointer. */
00202     if (pe->wstart == pe->wsize)
00203         pe->wstart = 0;
00204 
00205     /* Now calculate normalized difference function. */
00206     difflen = pe->frame_size / 2;
00207     cmn_diff(frame, pe->diff_window[outptr], difflen);
00208 
00209     /* Find the first point under threshold.  If not found, then
00210      * use the absolute minimum. */
00211     pe->period_window[outptr]
00212         = thresholded_search(pe->diff_window[outptr],
00213                              pe->search_threshold, 0, difflen);
00214 
00215     /* Increment total number of frames. */
00216     ++pe->nfr;
00217 }
00218 
00219 int
00220 yin_read(yin_t *pe, uint16 *out_period, uint16 *out_bestdiff)
00221 {
00222     int wstart, wlen, half_wsize, i;
00223     int best, best_diff, search_width, low_period, high_period;
00224 
00225     half_wsize = (pe->wsize-1)/2;
00226     /* Without any smoothing, just return the current value (don't
00227      * need to do anything to the current poitner either). */
00228     if (half_wsize == 0) {
00229         if (pe->endut)
00230             return 0;
00231         *out_period = pe->period_window[0];
00232         *out_bestdiff = pe->diff_window[0][pe->period_window[0]];
00233         return 1;
00234     }
00235 
00236     /* We can't do anything unless we have at least (wsize-1)/2 + 1
00237      * frames, unless we're at the end of the utterance. */
00238     if (pe->endut == 0 && pe->nfr < half_wsize + 1) {
00239         /* Don't increment the current pointer either. */
00240         return 0;
00241     }
00242 
00243     /* Establish the smoothing window. */
00244     /* End of utterance. */
00245     if (pe->endut) {
00246         /* We are done (no more data) when pe->wcur = pe->wstart. */
00247         if (pe->wcur == pe->wstart)
00248             return 0;
00249         /* I.e. pe->wcur (circular minus) half_wsize. */
00250         wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize;
00251         /* Number of frames from wstart up to pe->wstart. */
00252         wlen = pe->wstart - wstart;
00253         if (wlen < 0) wlen += pe->wsize;
00254         /*printf("ENDUT! ");*/
00255     }
00256     /* Beginning of utterance. */
00257     else if (pe->nfr < pe->wsize) {
00258         wstart = 0;
00259         wlen = pe->nfr;
00260     }
00261     /* Normal case, it is what it is. */
00262     else {
00263         wstart = pe->wstart;
00264         wlen = pe->wsize;
00265     }
00266 
00267     /* Now (finally) look for the best local estimate. */
00268     /* printf("Searching for local estimate in %d frames around %d\n",
00269        wlen, pe->nfr + 1 - wlen); */
00270     best = pe->period_window[pe->wcur];
00271     best_diff = pe->diff_window[pe->wcur][best];
00272     for (i = 0; i < wlen; ++i) {
00273         int j = wstart + i;
00274         int diff;
00275 
00276         j %= pe->wsize;
00277         diff = pe->diff_window[j][pe->period_window[j]];
00278         /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768,
00279            pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */
00280         if (diff < best_diff) {
00281             best_diff = diff;
00282             best = pe->period_window[j];
00283         }
00284     }
00285     /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768,
00286        best ? 8000.0/best : 8000.0); */
00287     /* If it's the same as the current one then return it. */
00288     if (best == pe->period_window[pe->wcur]) {
00289         /* Increment the current pointer. */
00290         if (++pe->wcur == pe->wsize)
00291             pe->wcur = 0;
00292         *out_period = best;
00293         *out_bestdiff = best_diff;
00294         return 1;
00295     }
00296     /* Otherwise, redo the search inside a narrower range. */
00297     search_width = best * pe->search_range / 32768;
00298     /* printf("Search width = %d * %.2f = %d\n",
00299        best, (double)pe->search_range/32768, search_width); */
00300     if (search_width == 0) search_width = 1;
00301     low_period = best - search_width;
00302     high_period = best + search_width;
00303     if (low_period < 0) low_period = 0;
00304     if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2;
00305     /* printf("Searching from %d to %d\n", low_period, high_period); */
00306     best = thresholded_search(pe->diff_window[pe->wcur],
00307                               pe->search_threshold,
00308                               low_period, high_period);
00309     best_diff = pe->diff_window[pe->wcur][best];
00310 
00311     if (out_period)
00312         *out_period = (best > 65535) ? 65535 : best;
00313     if (out_bestdiff)
00314         *out_bestdiff = (best_diff > 65535) ? 65535 : best_diff;
00315 
00316     /* Increment the current pointer. */
00317     if (++pe->wcur == pe->wsize)
00318         pe->wcur = 0;
00319     return 1;
00320 }

Generated on Fri Jan 14 2011 for SphinxBase by  doxygen 1.7.1