• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

include/fe.h

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1996-2004 Carnegie Mellon University.  All rights 
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 /*
00039  * fe.h
00040  * 
00041  * $Log: fe.h,v $
00042  * Revision 1.11  2005/02/05 02:15:02  egouvea
00043  * Removed fe_process(), never used
00044  *
00045  * Revision 1.10  2004/12/10 16:48:55  rkm
00046  * Added continuous density acoustic model handling
00047  *
00048  *
00049  */
00050 
00051 #if defined(WIN32) && !defined(GNUWINCE)
00052 #define srand48(x) srand(x)
00053 #define lrand48() rand()
00054 #endif
00055 
00056 #ifndef _NEW_FE_H_
00057 #define _NEW_FE_H_
00058 
00059 /* Win32/WinCE DLL gunk */
00060 #include <sphinxbase_export.h>
00061 
00062 #include <sphinx_config.h>
00063 #include <cmd_ln.h>
00064 #include <fixpoint.h>
00065 
00066 #ifdef __cplusplus
00067 extern "C" {
00068 #endif
00069 #if 0
00070 /* Fool Emacs. */
00071 }
00072 #endif
00073 
00074 #ifdef WORDS_BIGENDIAN
00075 #define NATIVE_ENDIAN "big"
00076 #else
00077 #define NATIVE_ENDIAN "little"
00078 #endif
00079 
00081 #define DEFAULT_SAMPLING_RATE 16000
00082 
00083 #define DEFAULT_FRAME_RATE 100
00084 
00086 #define DEFAULT_FRAME_SHIFT 160
00087 
00088 #define DEFAULT_WINDOW_LENGTH 0.025625 
00089 
00090 #define DEFAULT_FFT_SIZE 512
00091 
00092 #define DEFAULT_NUM_CEPSTRA 13
00093 
00094 #define DEFAULT_NUM_FILTERS 40
00095 
00096 #define DEFAULT_LOWER_FILT_FREQ 133.33334
00097 
00098 #define DEFAULT_UPPER_FILT_FREQ 6855.4976
00099 
00100 #define DEFAULT_PRE_EMPHASIS_ALPHA 0.97
00101 
00102 #define DEFAULT_WARP_TYPE "inverse_linear"
00103 
00104 #define SEED  -1
00105 
00106 #define waveform_to_cepstral_command_line_macro() \
00107   { "-logspec", \
00108     ARG_BOOLEAN, \
00109     "no", \
00110     "Write out logspectral files instead of cepstra" }, \
00111    \
00112   { "-smoothspec", \
00113     ARG_BOOLEAN, \
00114     "no", \
00115     "Write out cepstral-smoothed logspectral files" }, \
00116    \
00117   { "-transform", \
00118     ARG_STRING, \
00119     "legacy", \
00120     "Which type of transform to use to calculate cepstra (legacy, dct, or htk)" }, \
00121    \
00122   { "-spec2cep", \
00123     ARG_BOOLEAN, \
00124     "no", \
00125     "Input is log spectral files, output is cepstral files" }, \
00126    \
00127   { "-cep2spec", \
00128     ARG_BOOLEAN, \
00129     "no", \
00130     "Input is cepstral files, output is log spectral files" }, \
00131    \
00132   { "-alpha", \
00133     ARG_FLOAT32, \
00134     ARG_STRINGIFY(DEFAULT_PRE_EMPHASIS_ALPHA), \
00135     "Preemphasis parameter" }, \
00136    \
00137   { "-samprate", \
00138     ARG_FLOAT32, \
00139     ARG_STRINGIFY(DEFAULT_SAMPLING_RATE), \
00140     "Sampling rate" }, \
00141    \
00142   { "-frate", \
00143     ARG_INT32, \
00144     ARG_STRINGIFY(DEFAULT_FRAME_RATE), \
00145     "Frame rate" }, \
00146    \
00147   { "-wlen", \
00148     ARG_FLOAT32, \
00149     ARG_STRINGIFY(DEFAULT_WINDOW_LENGTH), \
00150     "Hamming window length" }, \
00151    \
00152   { "-nfft", \
00153     ARG_INT32, \
00154     ARG_STRINGIFY(DEFAULT_FFT_SIZE), \
00155     "Size of FFT" }, \
00156    \
00157   { "-nfilt", \
00158     ARG_INT32, \
00159     ARG_STRINGIFY(DEFAULT_NUM_FILTERS), \
00160     "Number of filter banks" }, \
00161    \
00162   { "-lowerf", \
00163     ARG_FLOAT32, \
00164     ARG_STRINGIFY(DEFAULT_LOWER_FILT_FREQ), \
00165     "Lower edge of filters" }, \
00166    \
00167   { "-upperf", \
00168     ARG_FLOAT32, \
00169     ARG_STRINGIFY(DEFAULT_UPPER_FILT_FREQ), \
00170     "Upper edge of filters" }, \
00171    \
00172   { "-unit_area", \
00173     ARG_BOOLEAN, \
00174     "yes", \
00175     "Normalize mel filters to unit area" }, \
00176    \
00177   { "-round_filters", \
00178     ARG_BOOLEAN, \
00179     "yes", \
00180     "Round mel filter frequencies to DFT points" }, \
00181    \
00182   { "-ncep", \
00183     ARG_INT32, \
00184     ARG_STRINGIFY(DEFAULT_NUM_CEPSTRA), \
00185     "Number of cep coefficients" }, \
00186    \
00187   { "-doublebw", \
00188     ARG_BOOLEAN, \
00189     "no", \
00190     "Use double bandwidth filters (same center freq)" }, \
00191    \
00192   { "-lifter", \
00193     ARG_INT32, \
00194     "0", \
00195     "Length of sin-curve for liftering, or 0 for no liftering." }, \
00196    \
00197   { "-input_endian", \
00198     ARG_STRING, \
00199     NATIVE_ENDIAN, \
00200     "Endianness of input data, big or little, ignored if NIST or MS Wav" }, \
00201    \
00202   { "-warp_type", \
00203     ARG_STRING, \
00204     DEFAULT_WARP_TYPE, \
00205     "Warping function type (or shape)" }, \
00206  \
00207   { "-warp_params", \
00208     ARG_STRING, \
00209     NULL, \
00210     "Parameters defining the warping function" }, \
00211  \
00212   { "-dither", \
00213     ARG_BOOLEAN, \
00214     "no", \
00215     "Add 1/2-bit noise" }, \
00216    \
00217   { "-seed", \
00218     ARG_INT32, \
00219     ARG_STRINGIFY(SEED), \
00220     "Seed for random number generator; if less than zero, pick our own" }, \
00221  \
00222   { "-remove_dc", \
00223     ARG_BOOLEAN, \
00224     "no", \
00225     "Remove DC offset from each frame" }, \
00226    \
00227   { "-verbose", \
00228     ARG_BOOLEAN, \
00229     "no", \
00230     "Show input filenames" }
00231   
00232 #ifdef FIXED_POINT
00233 
00234 typedef fixed32 mfcc_t;
00235 
00237 #define FLOAT2MFCC(x) FLOAT2FIX(x)
00238 
00239 #define MFCC2FLOAT(x) FIX2FLOAT(x)
00240 
00241 #define MFCCMUL(a,b) FIXMUL(a,b)
00242 #define MFCCLN(x,in,out) FIXLN_ANY(x,in,out)
00243 #else /* !FIXED_POINT */
00244 
00246 typedef float32 mfcc_t;
00248 #define FLOAT2MFCC(x) (x)
00249 
00250 #define MFCC2FLOAT(x) (x)
00251 
00252 #define MFCCMUL(a,b) ((a)*(b))
00253 #define MFCCLN(x,in,out) log(x)
00254 #endif /* !FIXED_POINT */
00255 
00259 typedef struct fe_s fe_t;
00260 
00264 enum fe_error_e {
00265         FE_SUCCESS = 0,
00266         FE_OUTPUT_FILE_SUCCESS  = 0,
00267         FE_CONTROL_FILE_ERROR = -1,
00268         FE_START_ERROR = -2,
00269         FE_UNKNOWN_SINGLE_OR_BATCH = -3,
00270         FE_INPUT_FILE_OPEN_ERROR = -4,
00271         FE_INPUT_FILE_READ_ERROR = -5,
00272         FE_MEM_ALLOC_ERROR = -6,
00273         FE_OUTPUT_FILE_WRITE_ERROR = -7,
00274         FE_OUTPUT_FILE_OPEN_ERROR = -8,
00275         FE_ZERO_ENERGY_ERROR = -9,
00276         FE_INVALID_PARAM_ERROR =  -10
00277 };
00278 
00286 SPHINXBASE_EXPORT
00287 fe_t* fe_init_auto(void);
00288 
00296 SPHINXBASE_EXPORT
00297 arg_t const *fe_get_args(void);
00298 
00309 SPHINXBASE_EXPORT
00310 fe_t *fe_init_auto_r(cmd_ln_t *config);
00311 
00319 SPHINXBASE_EXPORT
00320 cmd_ln_t *fe_get_config(fe_t *fe);
00321 
00326 SPHINXBASE_EXPORT
00327 int fe_start_utt(fe_t *fe);
00328 
00341 SPHINXBASE_EXPORT
00342 int fe_get_output_size(fe_t *fe);
00343 
00356 SPHINXBASE_EXPORT
00357 void fe_get_input_size(fe_t *fe, int *out_frame_shift,
00358                        int *out_frame_size);
00359 
00374 SPHINXBASE_EXPORT
00375 int fe_end_utt(fe_t *fe, mfcc_t *out_cepvector, int32 *out_nframes);
00376 
00382 SPHINXBASE_EXPORT
00383 fe_t *fe_retain(fe_t *fe);
00384 
00392 SPHINXBASE_EXPORT
00393 int fe_free(fe_t *fe);
00394 
00403 SPHINXBASE_EXPORT
00404 int fe_process_frame(fe_t *fe, int16 const *spch,
00405                      int32 nsamps, mfcc_t *out_cep);
00406 
00453 SPHINXBASE_EXPORT
00454 int fe_process_frames(fe_t *fe,
00455                       int16 const **inout_spch,
00456                       size_t *inout_nsamps,
00457                       mfcc_t **buf_cep,
00458                       int32 *inout_nframes);
00459 
00475 SPHINXBASE_EXPORT
00476 int fe_process_utt(fe_t *fe,  
00477                    int16 const *spch, 
00478                    size_t nsamps, 
00479                    mfcc_t ***cep_block, 
00480                    int32 *nframes 
00481         );
00482 
00486 SPHINXBASE_EXPORT
00487 void fe_free_2d(void *arr);
00488 
00492 SPHINXBASE_EXPORT
00493 int fe_mfcc_to_float(fe_t *fe,
00494                      mfcc_t **input,
00495                      float32 **output,
00496                      int32 nframes);
00497 
00501 SPHINXBASE_EXPORT
00502 int fe_float_to_mfcc(fe_t *fe,
00503                      float32 **input,
00504                      mfcc_t **output,
00505                      int32 nframes);
00506 
00530 SPHINXBASE_EXPORT
00531 int fe_logspec_to_mfcc(fe_t *fe,  
00532                          const mfcc_t *fr_spec, 
00533                          mfcc_t *fr_cep 
00534         );
00535 
00544 SPHINXBASE_EXPORT
00545 int fe_logspec_dct2(fe_t *fe,  
00546                       const mfcc_t *fr_spec, 
00547                       mfcc_t *fr_cep 
00548         );
00549 
00558 SPHINXBASE_EXPORT
00559 int fe_mfcc_dct3(fe_t *fe,  
00560                  const mfcc_t *fr_cep, 
00561                  mfcc_t *fr_spec 
00562         );
00563 
00564 #ifdef __cplusplus
00565 }
00566 #endif
00567 
00568 
00569 #endif

Generated on Fri Jan 14 2011 for SphinxBase by  doxygen 1.7.1