• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

include/cont_ad.h

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2001 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * cont_ad.h -- Continuous A/D listening and silence filtering module.
00039  * 
00040  * **********************************************
00041  * CMU ARPA Speech Project
00042  *
00043  * Copyright (c) 1996 Carnegie Mellon University.
00044  * ALL RIGHTS RESERVED.
00045  * **********************************************
00046  * 
00047  * HISTORY
00048  * 
00049  * 13-Jul-98    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00050  *              Added spf and adbufsize to cont_ad_t in order to support variable
00051  *              frame sizes depending on audio sampling rate.
00052  * 
00053  * 30-Jun-98    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00054  *              Added FILE* argument to cont_ad_powhist_dump().
00055  * 
00056  * 16-Jan-98    Paul Placeway (pwp@cs.cmu.edu) at Carnegie Mellon University
00057  *              Changed to use dB instead of the weird power measure.
00058  *              Added most system parameters to cont_ad_t instead of hardwiring
00059  *              them in cont_ad.c.
00060  *              Added cont_ad_set_params() and cont_ad_get_params().
00061  * 
00062  * 28-Jul-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00063  *              Added cont_ad_t.siglvl.
00064  * 
00065  * 27-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00066  *              Added the option for cont_ad_read to return -1 on EOF.
00067  * 
00068  * 21-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00069  *              Added cont_ad_set_thresh().
00070  * 
00071  * 20-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00072  *              Separated thresholds for speech and silence.
00073  * 
00074  * 17-Jun-96    M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
00075  *              Created, based loosely on Steve Reed's original implementation.
00076  */
00077 
00078 
00079 #ifndef _CONT_AD_H_
00080 #define _CONT_AD_H_
00081 
00082 /* Win32/WinCE DLL gunk */
00083 #include <sphinxbase_export.h>
00084 #include <prim_type.h>
00085 
00113 #include <stdio.h>
00114 
00115 
00116 #ifdef __cplusplus
00117 extern "C" {
00118 #endif
00119 #if 0
00120 /* Fool Emacs. */
00121 }
00122 #endif
00123 
00124 /* States of continuous listening module */
00125 #define CONT_AD_STATE_SIL       0
00126 #define CONT_AD_STATE_SPEECH    1
00127 
00128 
00134 typedef struct spseg_s {
00135     int32 startfrm;     
00136     int32 nfrm;         
00137     struct spseg_s *next;       
00138 } spseg_t;
00139 
00140 
00150 typedef struct {
00151     /* Function to be called for obtaining A/D data (see prototype for ad_read in ad.h) */
00152     int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max);
00153     ad_rec_t *ad;       
00155     int32 rawmode;      
00157     int16 *adbuf;       
00159     /* **************************************************************************
00160      * state, read_ts, and siglvl are provided for READ-ONLY use by client
00161      * applications, and are updated by calls to cont_ad_read() (see below).  All
00162      * other variables should be left alone.
00163      */
00164     int32 state;        
00166     int32 read_ts;      
00170     int32 seglen;       
00174     int32 siglvl;       
00177     /* ************************************************************************ */
00178     
00179     int32 sps;          
00182     int32 eof;          
00184     int32 spf;          
00185     int32 adbufsize;    
00186     int32 prev_sample;  
00187     int32 headfrm;      
00188     int32 n_frm;        
00189     int32 n_sample;     
00190     int32 tot_frm;      
00191     int32 noise_level;  
00193     int32 *pow_hist;    
00194     char *frm_pow;      
00196     int32 auto_thresh;  
00197     int32 delta_sil;    
00198     int32 delta_speech; 
00199     int32 min_noise;    
00200     int32 max_noise;    
00201     int32 winsize;      
00202     int32 speech_onset; 
00203     int32 sil_onset;    
00204     int32 leader;       
00205     int32 trailer;      
00207     int32 thresh_speech;
00209     int32 thresh_sil;   
00211     int32 thresh_update;
00212     float32 adapt_rate; 
00216     int32 tail_state;   
00219     int32 win_startfrm; 
00220     int32 win_validfrm; 
00221     int32 n_other;      
00223     spseg_t *spseg_head;
00224     spseg_t *spseg_tail;
00226     FILE *rawfp;        
00230     FILE *logfp;        
00235     int32 n_calib_frame; 
00236 } cont_ad_t;
00237 
00238 
00254 SPHINXBASE_EXPORT
00255 cont_ad_t *cont_ad_init (ad_rec_t *ad,  
00256                          int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max)
00260                          );
00261 
00268 SPHINXBASE_EXPORT
00269 cont_ad_t *cont_ad_init_rawmode (ad_rec_t *ad,
00270                                  int32 (*adfunc)(ad_rec_t *ad, int16 *buf, int32 max));
00271 
00272 
00301 SPHINXBASE_EXPORT
00302 int32 cont_ad_read (cont_ad_t *r,       
00303                     int16 *buf,         
00306                     int32 max           
00309         );
00310 
00314 SPHINXBASE_EXPORT
00315 int32 cont_ad_buffer_space(cont_ad_t *r);
00316 
00329 SPHINXBASE_EXPORT
00330 int32 cont_ad_calib (cont_ad_t *cont    
00331                      );
00332 
00344 SPHINXBASE_EXPORT
00345 int32 cont_ad_calib_loop (cont_ad_t *r, int16 *buf, int32 max); 
00346 
00358 SPHINXBASE_EXPORT
00359 int32 cont_ad_calib_size(cont_ad_t *r);
00360 
00373 SPHINXBASE_EXPORT
00374 int32 cont_ad_set_thresh (cont_ad_t *cont,      
00375                           int32 sil,    
00376                           int32 sp      
00377                           );
00378 
00379 
00387 SPHINXBASE_EXPORT
00388 int32 cont_ad_set_params (cont_ad_t *r, int32 delta_sil, int32 delta_speech,
00389                           int32 min_noise, int32 max_noise,
00390                           int32 winsize, int32 speech_onset, int32 sil_onset,
00391                           int32 leader, int32 trailer,
00392                           float32 adapt_rate);
00393 
00401 SPHINXBASE_EXPORT
00402 int32 cont_ad_get_params (cont_ad_t *r, int32 *delta_sil, int32 *delta_speech,
00403                           int32 *min_noise, int32 *max_noise,
00404                           int32 *winsize, int32 *speech_onset, int32 *sil_onset,
00405                           int32 *leader, int32 *trailer,
00406                           float32 *adapt_rate);
00407 
00412 SPHINXBASE_EXPORT
00413 int32 cont_ad_reset (cont_ad_t *cont);  /* In: Object pointer from cont_ad_init */
00414 
00415 
00419 SPHINXBASE_EXPORT
00420 int32 cont_ad_close (cont_ad_t *cont);  /* In: Object pointer from cont_ad_init */
00421 
00422 
00426 SPHINXBASE_EXPORT
00427 void cont_ad_powhist_dump (FILE *fp, cont_ad_t *cont);
00428 
00429 
00434 SPHINXBASE_EXPORT
00435 int32 cont_ad_detach (cont_ad_t *c);
00436 
00437 
00443 SPHINXBASE_EXPORT
00444 int32 cont_ad_attach (cont_ad_t *c, ad_rec_t *a, int32 (*func)(ad_rec_t *, int16 *, int32));
00445 
00446 
00458 SPHINXBASE_EXPORT
00459 int32 cont_ad_set_rawfp (cont_ad_t *c,  /* The cont_ad object being addressed */
00460                          FILE *fp);     /* File to which raw audio data is to
00461                                            be dumped; NULL to stop dumping. */
00462 
00470 SPHINXBASE_EXPORT
00471 int32 cont_ad_set_logfp (cont_ad_t *c,  /* The cont_ad object being addressed */
00472                          FILE *fp);     /* File to which logs are written;
00473                                            NULL to stop logging. */
00474 
00483 SPHINXBASE_EXPORT
00484 int32 cont_set_thresh(cont_ad_t *r, int32 silence, int32 speech);
00485 
00486 #ifdef __cplusplus
00487 }
00488 #endif
00489 
00490 
00491 #endif

Generated on Fri Jan 14 2011 for SphinxBase by  doxygen 1.7.1