SphinxBase  0.6
feat.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * feat.h -- Cepstral features computation.
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1999 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  * $Log$
49  * Revision 1.1 2006/04/05 20:27:30 dhdfu
50  * A Great Reorganzation of header files and executables
51  *
52  * Revision 1.17 2006/02/23 03:59:40 arthchan2003
53  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc.
54  *
55  * Revision 1.16.4.1 2005/07/05 06:25:08 arthchan2003
56  * Fixed dox-doc.
57  *
58  * Revision 1.16 2005/06/22 03:29:35 arthchan2003
59  * Makefile.am s for all subdirectory of libs3decoder/
60  *
61  * Revision 1.5 2005/06/13 04:02:56 archan
62  * Fixed most doxygen-style documentation under libs3decoder.
63  *
64  * Revision 1.4 2005/04/21 23:50:26 archan
65  * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used.
66  *
67  * Revision 1.3 2005/03/30 01:22:46 archan
68  * Fixed mistakes in last updates. Add
69  *
70  *
71  * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu)
72  * Adding feat_free() to free allocated memory
73  *
74  * 04-Jan-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University
75  * Started.
76  */
77 
78 
79 #ifndef _S3_FEAT_H_
80 #define _S3_FEAT_H_
81 
82 #include <stdio.h>
83 
84 /* Win32/WinCE DLL gunk */
85 #include <sphinxbase/sphinxbase_export.h>
86 #include <sphinxbase/prim_type.h>
87 #include <sphinxbase/fe.h>
88 #include <sphinxbase/cmn.h>
89 #include <sphinxbase/agc.h>
90 
91 #ifdef __cplusplus
92 extern "C" {
93 #endif
94 #if 0
95 /* Fool Emacs. */
96 }
97 #endif
98 
102 #define LIVEBUFBLOCKSIZE 256
104 #define S3_MAX_FRAMES 15000 /* RAH, I believe this is still too large, but better than before */
105 
106 #define cepstral_to_feature_command_line_macro() \
107 { "-feat", \
108  ARG_STRING, \
109  "1s_c_d_dd", \
110  "Feature stream type, depends on the acoustic model" }, \
111 { "-ceplen", \
112  ARG_INT32, \
113  "13", \
114  "Number of components in the input feature vector" }, \
115 { "-cmn", \
116  ARG_STRING, \
117  "current", \
118  "Cepstral mean normalization scheme ('current', 'prior', or 'none')" }, \
119 { "-cmninit", \
120  ARG_STRING, \
121  "8.0", \
122  "Initial values (comma-separated) for cepstral mean when 'prior' is used" }, \
123 { "-varnorm", \
124  ARG_BOOLEAN, \
125  "no", \
126  "Variance normalize each utterance (only if CMN == current)" }, \
127 { "-agc", \
128  ARG_STRING, \
129  "none", \
130  "Automatic gain control for c0 ('max', 'emax', 'noise', or 'none')" }, \
131 { "-agcthresh", \
132  ARG_FLOAT32, \
133  "2.0", \
134  "Initial threshold for automatic gain control" }, \
135 { "-lda", \
136  ARG_STRING, \
137  NULL, \
138  "File containing transformation matrix to be applied to features (single-stream features only)" }, \
139 { "-ldadim", \
140  ARG_INT32, \
141  "0", \
142  "Dimensionality of output of feature transformation (0 to use entire matrix)" }, \
143 {"-svspec", \
144  ARG_STRING, \
145  NULL, \
146  "Subvector specification (e.g., 24,0-11/25,12-23/26-38 or 0-12/13-25/26-38)"}
147 
155 typedef struct feat_s {
156  int refcount;
157  char *name;
158  int32 cepsize;
159  int32 n_stream;
160  uint32 *stream_len;
161  int32 window_size;
163  int32 n_sv;
164  uint32 *sv_len;
165  int32 **subvecs;
166  mfcc_t *sv_buf;
167  int32 sv_dim;
170  int32 varnorm;
172  agc_type_t agc;
186  void (*compute_feat)(struct feat_s *fcb, mfcc_t **input, mfcc_t **feat);
187  cmn_t *cmn_struct;
189  agc_t *agc_struct;
192  mfcc_t **cepbuf;
193  mfcc_t **tmpcepbuf;
194  int32 bufpos;
195  int32 curpos;
197  mfcc_t ***lda;
198  uint32 n_lda;
199  uint32 out_dim;
200 } feat_t;
201 
205 #define feat_name(f) ((f)->name)
206 
209 #define feat_cepsize(f) ((f)->cepsize)
210 
213 #define feat_window_size(f) ((f)->window_size)
214 
219 #define feat_n_stream(f) ((f)->n_stream)
220 
225 #define feat_stream_len(f,i) ((f)->stream_len[i])
226 
229 #define feat_dimension1(f) ((f)->n_sv ? (f)->n_sv : f->n_stream)
230 
233 #define feat_dimension2(f,i) ((f)->lda ? (f)->out_dim : ((f)->sv_len ? (f)->sv_len[i] : f->stream_len[i]))
234 
237 #define feat_dimension(f) ((f)->out_dim)
238 
241 #define feat_stream_lengths(f) ((f)->lda ? (&(f)->out_dim) : (f)->sv_len ? (f)->sv_len : f->stream_len)
242 
265 SPHINXBASE_EXPORT
266 int32 **parse_subvecs(char const *str);
267 
271 SPHINXBASE_EXPORT
272 void subvecs_free(int32 **subvecs);
273 
274 
287 SPHINXBASE_EXPORT
288 mfcc_t ***feat_array_alloc(feat_t *fcb,
290  int32 nfr
291  );
292 
296 SPHINXBASE_EXPORT
297 mfcc_t ***feat_array_realloc(feat_t *fcb,
299  mfcc_t ***old_feat,
300  int32 ofr,
301  int32 nfr
302  );
303 
307 SPHINXBASE_EXPORT
308 void feat_array_free(mfcc_t ***feat);
309 
310 
326 SPHINXBASE_EXPORT
327 feat_t *feat_init(char const *type,
328  cmn_type_t cmn,
331  int32 varnorm,
334  agc_type_t agc,
336  int32 breport,
337  int32 cepsize
340  );
341 
346 SPHINXBASE_EXPORT
347 int32 feat_read_lda(feat_t *feat,
348  const char *ldafile,
349  int32 dim
350  );
351 
355 SPHINXBASE_EXPORT
356 void feat_lda_transform(feat_t *fcb,
357  mfcc_t ***inout_feat,
358  uint32 nfr
359  );
360 
379 SPHINXBASE_EXPORT
380 int feat_set_subvecs(feat_t *fcb, int32 **subvecs);
381 
385 SPHINXBASE_EXPORT
386 void feat_print(feat_t *fcb,
387  mfcc_t ***feat,
388  int32 nfr,
389  FILE *fp
390  );
391 
392 
409 SPHINXBASE_EXPORT
410 int32 feat_s2mfc2feat(feat_t *fcb,
411  const char *file,
412  const char *dir,
414  const char *cepext,
417  int32 sf, int32 ef, /* Start/End frames
418  within file to be read. Use
419  0,-1 to process entire
420  file */
421  mfcc_t ***feat,
423  int32 maxfr
427  );
428 
429 
458 SPHINXBASE_EXPORT
459 int32 feat_s2mfc2feat_live(feat_t *fcb,
460  mfcc_t **uttcep,
461  int32 *inout_ncep,
463  int32 beginutt,
464  int32 endutt,
465  mfcc_t ***ofeat
468  );
469 
470 
476 SPHINXBASE_EXPORT
478 
484 SPHINXBASE_EXPORT
485 int feat_free(feat_t *f
486  );
487 
491 SPHINXBASE_EXPORT
492 void feat_report(feat_t *f
493  );
494 #ifdef __cplusplus
495 }
496 #endif
497 
498 
499 #endif