SphinxBase 0.6
|
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ 00002 /* ==================================================================== 00003 * Copyright (c) 1999-2004 Carnegie Mellon University. All rights 00004 * reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions 00008 * are met: 00009 * 00010 * 1. Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * 00013 * 2. Redistributions in binary form must reproduce the above copyright 00014 * notice, this list of conditions and the following disclaimer in 00015 * the documentation and/or other materials provided with the 00016 * distribution. 00017 * 00018 * This work was supported in part by funding from the Defense Advanced 00019 * Research Projects Agency and the National Science Foundation of the 00020 * United States of America, and the CMU Sphinx Speech Consortium. 00021 * 00022 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 00023 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 00024 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00025 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY 00026 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00027 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00028 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00029 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00030 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00031 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00032 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00033 * 00034 * ==================================================================== 00035 * 00036 */ 00037 /* 00038 * feat.c -- Feature vector description and cepstra->feature computation. 00039 * 00040 * ********************************************** 00041 * CMU ARPA Speech Project 00042 * 00043 * Copyright (c) 1996 Carnegie Mellon University. 00044 * ALL RIGHTS RESERVED. 00045 * ********************************************** 00046 * 00047 * HISTORY 00048 * $Log$ 00049 * Revision 1.22 2006/02/23 03:59:40 arthchan2003 00050 * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc. 00051 * 00052 * Revision 1.21.4.3 2005/10/17 04:45:57 arthchan2003 00053 * Free stuffs in cmn and feat corectly. 00054 * 00055 * Revision 1.21.4.2 2005/09/26 02:19:57 arthchan2003 00056 * Add message to show the directory which the feature is searched for. 00057 * 00058 * Revision 1.21.4.1 2005/07/03 22:55:50 arthchan2003 00059 * More correct deallocation in feat.c. The cmn deallocation is still not correct at this point. 00060 * 00061 * Revision 1.21 2005/06/22 03:29:35 arthchan2003 00062 * Makefile.am s for all subdirectory of libs3decoder/ 00063 * 00064 * Revision 1.4 2005/04/21 23:50:26 archan 00065 * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used. 00066 * 00067 * Revision 1.3 2005/03/30 01:22:46 archan 00068 * Fixed mistakes in last updates. Add 00069 * 00070 * 00071 * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) 00072 * Adding feat_free() to free allocated memory 00073 * 00074 * 02-Jan-2001 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University 00075 * Modified feat_s2mfc2feat_block() to handle empty buffers at 00076 * the end of an utterance 00077 * 00078 * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University 00079 * Added feat_s2mfc2feat_block() to allow feature computation 00080 * from sequences of blocks of cepstral vectors 00081 * 00082 * 12-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00083 * Major changes to accommodate arbitrary feature input types. Added 00084 * feat_read(), moved various cep2feat functions from other files into 00085 * this one. Also, made this module object-oriented with the feat_t type. 00086 * Changed definition of s2mfc_read to let the caller manage MFC buffers. 00087 * 00088 * 03-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00089 * Added unistd.h include. 00090 * 00091 * 02-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00092 * Added check for sf argument to s2mfc_read being within file size. 00093 * 00094 * 18-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00095 * Added sf, ef parameters to s2mfc_read(). 00096 * 00097 * 10-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00098 * Added feat_cepsize(). 00099 * Added different feature-handling (s2_4x, s3_1x39 at this point). 00100 * Moved feature-dependent functions to feature-dependent files. 00101 * 00102 * 09-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00103 * Moved constant declarations from feat.h into here. 00104 * 00105 * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University 00106 * Created. 00107 */ 00108 00109 00110 /* 00111 * This module encapsulates different feature streams used by the Sphinx group. New 00112 * stream types can be added by augmenting feat_init() and providing an accompanying 00113 * compute_feat function. It also provides a "generic" feature vector definition for 00114 * handling "arbitrary" speech input feature types (see the last section in feat_init()). 00115 * In this case the speech input data should already be feature vectors; no computation, 00116 * such as MFC->feature conversion, is available or needed. 00117 */ 00118 00119 #include <assert.h> 00120 #include <string.h> 00121 #ifdef HAVE_CONFIG_H 00122 #include <config.h> 00123 #endif 00124 00125 #ifdef _MSC_VER 00126 #pragma warning (disable: 4244 4996) 00127 #endif 00128 00129 #include "sphinxbase/fe.h" 00130 #include "sphinxbase/feat.h" 00131 #include "sphinxbase/bio.h" 00132 #include "sphinxbase/pio.h" 00133 #include "sphinxbase/cmn.h" 00134 #include "sphinxbase/agc.h" 00135 #include "sphinxbase/err.h" 00136 #include "sphinxbase/ckd_alloc.h" 00137 #include "sphinxbase/prim_type.h" 00138 #include "sphinxbase/glist.h" 00139 00140 #define FEAT_VERSION "1.0" 00141 #define FEAT_DCEP_WIN 2 00142 00143 #ifdef DUMP_FEATURES 00144 static void 00145 cep_dump_dbg(feat_t *fcb, mfcc_t **mfc, int32 nfr, const char *text) 00146 { 00147 int32 i, j; 00148 00149 E_INFO("%s\n", text); 00150 for (i = 0; i < nfr; i++) { 00151 for (j = 0; j < fcb->cepsize; j++) { 00152 fprintf(stderr, "%f ", MFCC2FLOAT(mfc[i][j])); 00153 } 00154 fprintf(stderr, "\n"); 00155 } 00156 } 00157 static void 00158 feat_print_dbg(feat_t *fcb, mfcc_t ***feat, int32 nfr, const char *text) 00159 { 00160 E_INFO("%s\n", text); 00161 feat_print(fcb, feat, nfr, stderr); 00162 } 00163 #else /* !DUMP_FEATURES */ 00164 #define cep_dump_dbg(fcb,mfc,nfr,text) 00165 #define feat_print_dbg(fcb,mfc,nfr,text) 00166 #endif 00167 00168 int32 ** 00169 parse_subvecs(char const *str) 00170 { 00171 char const *strp; 00172 int32 n, n2, l; 00173 glist_t dimlist; /* List of dimensions in one subvector */ 00174 glist_t veclist; /* List of dimlists (subvectors) */ 00175 int32 **subvec; 00176 gnode_t *gn, *gn2; 00177 00178 veclist = NULL; 00179 00180 strp = str; 00181 for (;;) { 00182 dimlist = NULL; 00183 00184 for (;;) { 00185 if (sscanf(strp, "%d%n", &n, &l) != 1) 00186 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str, 00187 strp - str); 00188 strp += l; 00189 00190 if (*strp == '-') { 00191 strp++; 00192 00193 if (sscanf(strp, "%d%n", &n2, &l) != 1) 00194 E_FATAL("'%s': Couldn't read int32 @pos %d\n", str, 00195 strp - str); 00196 strp += l; 00197 } 00198 else 00199 n2 = n; 00200 00201 if ((n < 0) || (n > n2)) 00202 E_FATAL("'%s': Bad subrange spec ending @pos %d\n", str, 00203 strp - str); 00204 00205 for (; n <= n2; n++) { 00206 gnode_t *gn; 00207 for (gn = dimlist; gn; gn = gnode_next(gn)) 00208 if (gnode_int32(gn) == n) 00209 break; 00210 if (gn != NULL) 00211 E_FATAL("'%s': Duplicate dimension ending @pos %d\n", 00212 str, strp - str); 00213 00214 dimlist = glist_add_int32(dimlist, n); 00215 } 00216 00217 if ((*strp == '\0') || (*strp == '/')) 00218 break; 00219 00220 if (*strp != ',') 00221 E_FATAL("'%s': Bad delimiter @pos %d\n", str, strp - str); 00222 00223 strp++; 00224 } 00225 00226 veclist = glist_add_ptr(veclist, (void *) dimlist); 00227 00228 if (*strp == '\0') 00229 break; 00230 00231 assert(*strp == '/'); 00232 strp++; 00233 } 00234 00235 /* Convert the glists to arrays; remember the glists are in reverse order of the input! */ 00236 n = glist_count(veclist); /* #Subvectors */ 00237 subvec = (int32 **) ckd_calloc(n + 1, sizeof(int32 *)); /* +1 for sentinel */ 00238 subvec[n] = NULL; /* sentinel */ 00239 00240 for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) { 00241 gn2 = (glist_t) gnode_ptr(gn); 00242 00243 n2 = glist_count(gn2); /* Length of this subvector */ 00244 if (n2 <= 0) 00245 E_FATAL("'%s': 0-length subvector\n", str); 00246 00247 subvec[n] = (int32 *) ckd_calloc(n2 + 1, sizeof(int32)); /* +1 for sentinel */ 00248 subvec[n][n2] = -1; /* sentinel */ 00249 00250 for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2) 00251 subvec[n][n2] = gnode_int32(gn2); 00252 assert((n2 < 0) && (!gn2)); 00253 } 00254 assert((n < 0) && (!gn)); 00255 00256 /* Free the glists */ 00257 for (gn = veclist; gn; gn = gnode_next(gn)) { 00258 gn2 = (glist_t) gnode_ptr(gn); 00259 glist_free(gn2); 00260 } 00261 glist_free(veclist); 00262 00263 return subvec; 00264 } 00265 00266 void 00267 subvecs_free(int32 **subvecs) 00268 { 00269 int32 **sv; 00270 00271 for (sv = subvecs; sv && *sv; ++sv) 00272 ckd_free(*sv); 00273 ckd_free(subvecs); 00274 } 00275 00276 int 00277 feat_set_subvecs(feat_t *fcb, int32 **subvecs) 00278 { 00279 int32 **sv; 00280 int32 n_sv, n_dim, i; 00281 00282 if (subvecs == NULL) { 00283 subvecs_free(fcb->subvecs); 00284 ckd_free(fcb->sv_buf); 00285 ckd_free(fcb->sv_len); 00286 fcb->n_sv = 0; 00287 fcb->subvecs = NULL; 00288 fcb->sv_len = NULL; 00289 fcb->sv_buf = NULL; 00290 fcb->sv_dim = 0; 00291 return 0; 00292 } 00293 00294 if (fcb->n_stream != 1) { 00295 E_ERROR("Subvector specifications require single-stream features!"); 00296 return -1; 00297 } 00298 00299 n_sv = 0; 00300 n_dim = 0; 00301 for (sv = subvecs; sv && *sv; ++sv) { 00302 int32 *d; 00303 00304 for (d = *sv; d && *d != -1; ++d) { 00305 ++n_dim; 00306 } 00307 ++n_sv; 00308 } 00309 if (n_dim > feat_dimension(fcb)) { 00310 E_ERROR("Total dimensionality of subvector specification %d " 00311 "> feature dimensionality %d\n", n_dim, feat_dimension(fcb)); 00312 return -1; 00313 } 00314 00315 fcb->n_sv = n_sv; 00316 fcb->subvecs = subvecs; 00317 fcb->sv_len = ckd_calloc(n_sv, sizeof(*fcb->sv_len)); 00318 fcb->sv_buf = ckd_calloc(n_dim, sizeof(*fcb->sv_buf)); 00319 fcb->sv_dim = n_dim; 00320 for (i = 0; i < n_sv; ++i) { 00321 int32 *d; 00322 for (d = subvecs[i]; d && *d != -1; ++d) { 00323 ++fcb->sv_len[i]; 00324 } 00325 } 00326 00327 return 0; 00328 } 00329 00333 static void 00334 feat_subvec_project(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr) 00335 { 00336 uint32 i; 00337 00338 if (fcb->subvecs == NULL) 00339 return; 00340 for (i = 0; i < nfr; ++i) { 00341 mfcc_t *out; 00342 int32 j; 00343 00344 out = fcb->sv_buf; 00345 for (j = 0; j < fcb->n_sv; ++j) { 00346 int32 *d; 00347 for (d = fcb->subvecs[j]; d && *d != -1; ++d) { 00348 *out++ = inout_feat[i][0][*d]; 00349 } 00350 } 00351 memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim * sizeof(*fcb->sv_buf)); 00352 } 00353 } 00354 00355 mfcc_t *** 00356 feat_array_alloc(feat_t * fcb, int32 nfr) 00357 { 00358 int32 i, j, k; 00359 mfcc_t *data, *d, ***feat; 00360 00361 assert(fcb); 00362 assert(nfr > 0); 00363 assert(feat_dimension(fcb) > 0); 00364 00365 /* Make sure to use the dimensionality of the features *before* 00366 LDA and subvector projection. */ 00367 k = 0; 00368 for (i = 0; i < fcb->n_stream; ++i) 00369 k += fcb->stream_len[i]; 00370 assert(k >= feat_dimension(fcb)); 00371 assert(k >= fcb->sv_dim); 00372 00373 feat = 00374 (mfcc_t ***) ckd_calloc_2d(nfr, feat_dimension1(fcb), sizeof(mfcc_t *)); 00375 data = (mfcc_t *) ckd_calloc(nfr * k, sizeof(mfcc_t)); 00376 00377 for (i = 0; i < nfr; i++) { 00378 d = data + i * k; 00379 for (j = 0; j < feat_dimension1(fcb); j++) { 00380 feat[i][j] = d; 00381 d += feat_dimension2(fcb, j); 00382 } 00383 } 00384 00385 return feat; 00386 } 00387 00388 void 00389 feat_array_free(mfcc_t ***feat) 00390 { 00391 ckd_free(feat[0][0]); 00392 ckd_free_2d((void **)feat); 00393 } 00394 00395 static void 00396 feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00397 { 00398 mfcc_t *f; 00399 mfcc_t *w, *_w; 00400 mfcc_t *w1, *w_1, *_w1, *_w_1; 00401 mfcc_t d1, d2; 00402 int32 i, j; 00403 00404 assert(fcb); 00405 assert(feat_cepsize(fcb) == 13); 00406 assert(feat_n_stream(fcb) == 4); 00407 assert(feat_stream_len(fcb, 0) == 12); 00408 assert(feat_stream_len(fcb, 1) == 24); 00409 assert(feat_stream_len(fcb, 2) == 3); 00410 assert(feat_stream_len(fcb, 3) == 12); 00411 assert(feat_window_size(fcb) == 4); 00412 00413 /* CEP; skip C0 */ 00414 memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); 00415 00416 /* 00417 * DCEP(SHORT): mfc[2] - mfc[-2] 00418 * DCEP(LONG): mfc[4] - mfc[-4] 00419 */ 00420 w = mfc[2] + 1; /* +1 to skip C0 */ 00421 _w = mfc[-2] + 1; 00422 00423 f = feat[1]; 00424 for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */ 00425 f[i] = w[i] - _w[i]; 00426 00427 w = mfc[4] + 1; /* +1 to skip C0 */ 00428 _w = mfc[-4] + 1; 00429 00430 for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */ 00431 f[i] = w[j] - _w[j]; 00432 00433 /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ 00434 w1 = mfc[3] + 1; /* Final +1 to skip C0 */ 00435 _w1 = mfc[-1] + 1; 00436 w_1 = mfc[1] + 1; 00437 _w_1 = mfc[-3] + 1; 00438 00439 f = feat[3]; 00440 for (i = 0; i < feat_cepsize(fcb) - 1; i++) { 00441 d1 = w1[i] - _w1[i]; 00442 d2 = w_1[i] - _w_1[i]; 00443 00444 f[i] = d1 - d2; 00445 } 00446 00447 /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */ 00448 f = feat[2]; 00449 f[0] = mfc[0][0]; 00450 f[1] = mfc[2][0] - mfc[-2][0]; 00451 00452 d1 = mfc[3][0] - mfc[-1][0]; 00453 d2 = mfc[1][0] - mfc[-3][0]; 00454 f[2] = d1 - d2; 00455 } 00456 00457 00458 static void 00459 feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00460 { 00461 mfcc_t *f; 00462 mfcc_t *w, *_w; 00463 mfcc_t *w1, *w_1, *_w1, *_w_1; 00464 mfcc_t d1, d2; 00465 int32 i; 00466 00467 assert(fcb); 00468 assert(feat_cepsize(fcb) == 13); 00469 assert(feat_n_stream(fcb) == 1); 00470 assert(feat_stream_len(fcb, 0) == 39); 00471 assert(feat_window_size(fcb) == 3); 00472 00473 /* CEP; skip C0 */ 00474 memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); 00475 /* 00476 * DCEP: mfc[2] - mfc[-2]; 00477 */ 00478 f = feat[0] + feat_cepsize(fcb) - 1; 00479 w = mfc[2] + 1; /* +1 to skip C0 */ 00480 _w = mfc[-2] + 1; 00481 00482 for (i = 0; i < feat_cepsize(fcb) - 1; i++) 00483 f[i] = w[i] - _w[i]; 00484 00485 /* POW: C0, DC0, D2C0 */ 00486 f += feat_cepsize(fcb) - 1; 00487 00488 f[0] = mfc[0][0]; 00489 f[1] = mfc[2][0] - mfc[-2][0]; 00490 00491 d1 = mfc[3][0] - mfc[-1][0]; 00492 d2 = mfc[1][0] - mfc[-3][0]; 00493 f[2] = d1 - d2; 00494 00495 /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ 00496 f += 3; 00497 00498 w1 = mfc[3] + 1; /* Final +1 to skip C0 */ 00499 _w1 = mfc[-1] + 1; 00500 w_1 = mfc[1] + 1; 00501 _w_1 = mfc[-3] + 1; 00502 00503 for (i = 0; i < feat_cepsize(fcb) - 1; i++) { 00504 d1 = w1[i] - _w1[i]; 00505 d2 = w_1[i] - _w_1[i]; 00506 00507 f[i] = d1 - d2; 00508 } 00509 } 00510 00511 00512 static void 00513 feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00514 { 00515 assert(fcb); 00516 assert(feat_n_stream(fcb) == 1); 00517 assert(feat_window_size(fcb) == 0); 00518 00519 /* CEP */ 00520 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); 00521 } 00522 00523 static void 00524 feat_s3_cepwin(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00525 { 00526 assert(fcb); 00527 assert(feat_n_stream(fcb) == 1); 00528 00529 /* CEP */ 00530 memcpy(feat[0], mfc[ -feat_window_size(fcb)], 00531 (1 + 2 * feat_window_size (fcb)) * feat_cepsize(fcb) * sizeof(mfcc_t)); 00532 } 00533 00534 00535 00536 static void 00537 feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00538 { 00539 mfcc_t *f; 00540 mfcc_t *w, *_w; 00541 int32 i; 00542 00543 assert(fcb); 00544 assert(feat_n_stream(fcb) == 1); 00545 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 2); 00546 assert(feat_window_size(fcb) == 2); 00547 00548 /* CEP */ 00549 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); 00550 00551 /* 00552 * DCEP: mfc[2] - mfc[-2]; 00553 */ 00554 f = feat[0] + feat_cepsize(fcb); 00555 w = mfc[2]; 00556 _w = mfc[-2]; 00557 00558 for (i = 0; i < feat_cepsize(fcb); i++) 00559 f[i] = w[i] - _w[i]; 00560 } 00561 00562 static void 00563 feat_1s_c_d_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00564 { 00565 mfcc_t *f; 00566 mfcc_t *w, *_w; 00567 mfcc_t *w1, *w_1, *_w1, *_w_1; 00568 mfcc_t d1, d2; 00569 int32 i; 00570 00571 assert(fcb); 00572 assert(feat_n_stream(fcb) == 1); 00573 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 3); 00574 assert(feat_window_size(fcb) == FEAT_DCEP_WIN + 1); 00575 00576 /* CEP */ 00577 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); 00578 00579 /* 00580 * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN; 00581 */ 00582 f = feat[0] + feat_cepsize(fcb); 00583 w = mfc[FEAT_DCEP_WIN]; 00584 _w = mfc[-FEAT_DCEP_WIN]; 00585 00586 for (i = 0; i < feat_cepsize(fcb); i++) 00587 f[i] = w[i] - _w[i]; 00588 00589 /* 00590 * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), 00591 * where w = FEAT_DCEP_WIN 00592 */ 00593 f += feat_cepsize(fcb); 00594 00595 w1 = mfc[FEAT_DCEP_WIN + 1]; 00596 _w1 = mfc[-FEAT_DCEP_WIN + 1]; 00597 w_1 = mfc[FEAT_DCEP_WIN - 1]; 00598 _w_1 = mfc[-FEAT_DCEP_WIN - 1]; 00599 00600 for (i = 0; i < feat_cepsize(fcb); i++) { 00601 d1 = w1[i] - _w1[i]; 00602 d2 = w_1[i] - _w_1[i]; 00603 00604 f[i] = d1 - d2; 00605 } 00606 } 00607 00608 static void 00609 feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00610 { 00611 mfcc_t *f; 00612 mfcc_t *w, *_w; 00613 mfcc_t *w1, *w_1, *_w1, *_w_1; 00614 mfcc_t d1, d2; 00615 int32 i; 00616 00617 assert(fcb); 00618 assert(feat_n_stream(fcb) == 1); 00619 assert(feat_stream_len(fcb, 0) == feat_cepsize(fcb) * 4); 00620 assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2); 00621 00622 /* CEP */ 00623 memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); 00624 00625 /* 00626 * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN; 00627 */ 00628 f = feat[0] + feat_cepsize(fcb); 00629 w = mfc[FEAT_DCEP_WIN]; 00630 _w = mfc[-FEAT_DCEP_WIN]; 00631 00632 for (i = 0; i < feat_cepsize(fcb); i++) 00633 f[i] = w[i] - _w[i]; 00634 00635 /* 00636 * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2; 00637 */ 00638 f += feat_cepsize(fcb); 00639 w = mfc[FEAT_DCEP_WIN * 2]; 00640 _w = mfc[-FEAT_DCEP_WIN * 2]; 00641 00642 for (i = 0; i < feat_cepsize(fcb); i++) 00643 f[i] = w[i] - _w[i]; 00644 00645 /* 00646 * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), 00647 * where w = FEAT_DCEP_WIN 00648 */ 00649 f += feat_cepsize(fcb); 00650 00651 w1 = mfc[FEAT_DCEP_WIN + 1]; 00652 _w1 = mfc[-FEAT_DCEP_WIN + 1]; 00653 w_1 = mfc[FEAT_DCEP_WIN - 1]; 00654 _w_1 = mfc[-FEAT_DCEP_WIN - 1]; 00655 00656 for (i = 0; i < feat_cepsize(fcb); i++) { 00657 d1 = w1[i] - _w1[i]; 00658 d2 = w_1[i] - _w_1[i]; 00659 00660 f[i] = d1 - d2; 00661 } 00662 } 00663 00664 static void 00665 feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) 00666 { 00667 int32 win, i, j; 00668 00669 win = feat_window_size(fcb); 00670 00671 /* Concatenate input features */ 00672 for (i = -win; i <= win; ++i) { 00673 uint32 spos = 0; 00674 00675 for (j = 0; j < feat_n_stream(fcb); ++j) { 00676 uint32 stream_len; 00677 00678 /* Unscale the stream length by the window. */ 00679 stream_len = feat_stream_len(fcb, j) / (2 * win + 1); 00680 memcpy(feat[j] + ((i + win) * stream_len), 00681 mfc[i] + spos, 00682 stream_len * sizeof(mfcc_t)); 00683 spos += stream_len; 00684 } 00685 } 00686 } 00687 00688 feat_t * 00689 feat_init(char const *type, cmn_type_t cmn, int32 varnorm, 00690 agc_type_t agc, int32 breport, int32 cepsize) 00691 { 00692 feat_t *fcb; 00693 00694 if (cepsize == 0) 00695 cepsize = 13; 00696 if (breport) 00697 E_INFO 00698 ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n", 00699 type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]); 00700 00701 fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t)); 00702 fcb->refcount = 1; 00703 fcb->name = (char *) ckd_salloc(type); 00704 if (strcmp(type, "s2_4x") == 0) { 00705 /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */ 00706 if (cepsize != 13) { 00707 E_ERROR("s2_4x features require cepsize == 13\n"); 00708 ckd_free(fcb); 00709 return NULL; 00710 } 00711 fcb->cepsize = 13; 00712 fcb->n_stream = 4; 00713 fcb->stream_len = (int32 *) ckd_calloc(4, sizeof(int32)); 00714 fcb->stream_len[0] = 12; 00715 fcb->stream_len[1] = 24; 00716 fcb->stream_len[2] = 3; 00717 fcb->stream_len[3] = 12; 00718 fcb->out_dim = 51; 00719 fcb->window_size = 4; 00720 fcb->compute_feat = feat_s2_4x_cep2feat; 00721 } 00722 else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) { 00723 /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */ 00724 if (cepsize != 13) { 00725 E_ERROR("s2_4x features require cepsize == 13\n"); 00726 ckd_free(fcb); 00727 return NULL; 00728 } 00729 fcb->cepsize = 13; 00730 fcb->n_stream = 1; 00731 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); 00732 fcb->stream_len[0] = 39; 00733 fcb->out_dim = 39; 00734 fcb->window_size = 3; 00735 fcb->compute_feat = feat_s3_1x39_cep2feat; 00736 } 00737 else if (strncmp(type, "1s_c_d_dd", 9) == 0) { 00738 fcb->cepsize = cepsize; 00739 fcb->n_stream = 1; 00740 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); 00741 fcb->stream_len[0] = cepsize * 3; 00742 fcb->out_dim = cepsize * 3; 00743 fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */ 00744 fcb->compute_feat = feat_1s_c_d_dd_cep2feat; 00745 } 00746 else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) { 00747 fcb->cepsize = cepsize; 00748 fcb->n_stream = 1; 00749 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); 00750 fcb->stream_len[0] = cepsize * 4; 00751 fcb->out_dim = cepsize * 4; 00752 fcb->window_size = FEAT_DCEP_WIN * 2; 00753 fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat; 00754 } 00755 else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) { 00756 /* 1-stream cep/dcep */ 00757 fcb->cepsize = cepsize; 00758 fcb->n_stream = 1; 00759 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); 00760 fcb->stream_len[0] = feat_cepsize(fcb) * 2; 00761 fcb->out_dim = fcb->stream_len[0]; 00762 fcb->window_size = 2; 00763 fcb->compute_feat = feat_s3_cep_dcep; 00764 } 00765 else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) { 00766 /* 1-stream cep */ 00767 fcb->cepsize = cepsize; 00768 fcb->n_stream = 1; 00769 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); 00770 fcb->stream_len[0] = feat_cepsize(fcb); 00771 fcb->out_dim = fcb->stream_len[0]; 00772 fcb->window_size = 0; 00773 fcb->compute_feat = feat_s3_cep; 00774 } 00775 else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) { 00776 /* 1-stream cep with frames concatenated, so called cepwin features */ 00777 if (strncmp(type, "1s_3c", 5) == 0) 00778 fcb->window_size = 3; 00779 else 00780 fcb->window_size = 4; 00781 00782 fcb->cepsize = cepsize; 00783 fcb->n_stream = 1; 00784 fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); 00785 fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1); 00786 fcb->out_dim = fcb->stream_len[0]; 00787 fcb->compute_feat = feat_s3_cepwin; 00788 } 00789 else { 00790 int32 i, l, k; 00791 char *strp; 00792 char *mtype = ckd_salloc(type); 00793 char *wd = ckd_salloc(type); 00794 /* 00795 * Generic definition: Format should be %d,%d,%d,...,%d (i.e., 00796 * comma separated list of feature stream widths; #items = 00797 * #streams). An optional window size (frames will be 00798 * concatenated) is also allowed, which can be specified with 00799 * a colon after the list of feature streams. 00800 */ 00801 l = strlen(mtype); 00802 k = 0; 00803 for (i = 1; i < l - 1; i++) { 00804 if (mtype[i] == ',') { 00805 mtype[i] = ' '; 00806 k++; 00807 } 00808 else if (mtype[i] == ':') { 00809 mtype[i] = '\0'; 00810 fcb->window_size = atoi(mtype + i + 1); 00811 break; 00812 } 00813 } 00814 k++; /* Presumably there are (#commas+1) streams */ 00815 fcb->n_stream = k; 00816 fcb->stream_len = (int32 *) ckd_calloc(k, sizeof(int32)); 00817 00818 /* Scan individual feature stream lengths */ 00819 strp = mtype; 00820 i = 0; 00821 fcb->out_dim = 0; 00822 fcb->cepsize = 0; 00823 while (sscanf(strp, "%s%n", wd, &l) == 1) { 00824 strp += l; 00825 if ((i >= fcb->n_stream) 00826 || (sscanf(wd, "%d", &(fcb->stream_len[i])) != 1) 00827 || (fcb->stream_len[i] <= 0)) 00828 E_FATAL("Bad feature type argument\n"); 00829 /* Input size before windowing */ 00830 fcb->cepsize += fcb->stream_len[i]; 00831 if (fcb->window_size > 0) 00832 fcb->stream_len[i] *= (fcb->window_size * 2 + 1); 00833 /* Output size after windowing */ 00834 fcb->out_dim += fcb->stream_len[i]; 00835 i++; 00836 } 00837 if (i != fcb->n_stream) 00838 E_FATAL("Bad feature type argument\n"); 00839 if (fcb->cepsize != cepsize) 00840 E_FATAL("Bad feature type argument\n"); 00841 00842 /* Input is already the feature stream */ 00843 fcb->compute_feat = feat_copy; 00844 ckd_free(mtype); 00845 ckd_free(wd); 00846 } 00847 00848 if (cmn != CMN_NONE) 00849 fcb->cmn_struct = cmn_init(feat_cepsize(fcb)); 00850 fcb->cmn = cmn; 00851 fcb->varnorm = varnorm; 00852 if (agc != AGC_NONE) { 00853 fcb->agc_struct = agc_init(); 00854 /* 00855 * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things 00856 * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY 00857 * switches to EMAX 00858 */ 00859 /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */ 00860 agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0); 00861 } 00862 fcb->agc = agc; 00863 /* 00864 * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt() 00865 */ 00866 fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE, 00867 feat_cepsize(fcb), 00868 sizeof(mfcc_t)); 00869 /* This one is actually just an array of pointers to "flatten out" 00870 * wraparounds. */ 00871 fcb->tmpcepbuf = ckd_calloc(2 * feat_window_size(fcb) + 1, 00872 sizeof(*fcb->tmpcepbuf)); 00873 00874 return fcb; 00875 } 00876 00877 00878 void 00879 feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp) 00880 { 00881 int32 i, j, k; 00882 00883 for (i = 0; i < nfr; i++) { 00884 fprintf(fp, "%8d:\n", i); 00885 00886 for (j = 0; j < feat_dimension1(fcb); j++) { 00887 fprintf(fp, "\t%2d:", j); 00888 00889 for (k = 0; k < feat_dimension2(fcb, j); k++) 00890 fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k])); 00891 fprintf(fp, "\n"); 00892 } 00893 } 00894 00895 fflush(fp); 00896 } 00897 00898 static void 00899 feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt) 00900 { 00901 cmn_type_t cmn_type = fcb->cmn; 00902 00903 if (!(beginutt && endutt) 00904 && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */ 00905 cmn_type = CMN_PRIOR; 00906 00907 switch (cmn_type) { 00908 case CMN_CURRENT: 00909 cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr); 00910 break; 00911 case CMN_PRIOR: 00912 cmn_prior(fcb->cmn_struct, mfc, fcb->varnorm, nfr); 00913 if (endutt) 00914 cmn_prior_update(fcb->cmn_struct); 00915 break; 00916 default: 00917 ; 00918 } 00919 cep_dump_dbg(fcb, mfc, nfr, "After CMN"); 00920 } 00921 00922 static void 00923 feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt) 00924 { 00925 agc_type_t agc_type = fcb->agc; 00926 00927 if (!(beginutt && endutt) 00928 && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */ 00929 agc_type = AGC_EMAX; 00930 00931 switch (agc_type) { 00932 case AGC_MAX: 00933 agc_max(fcb->agc_struct, mfc, nfr); 00934 break; 00935 case AGC_EMAX: 00936 agc_emax(fcb->agc_struct, mfc, nfr); 00937 if (endutt) 00938 agc_emax_update(fcb->agc_struct); 00939 break; 00940 case AGC_NOISE: 00941 agc_noise(fcb->agc_struct, mfc, nfr); 00942 break; 00943 default: 00944 ; 00945 } 00946 cep_dump_dbg(fcb, mfc, nfr, "After AGC"); 00947 } 00948 00949 static void 00950 feat_compute_utt(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat) 00951 { 00952 int32 i; 00953 00954 cep_dump_dbg(fcb, mfc, nfr, "Incoming features (after padding)"); 00955 00956 /* Create feature vectors */ 00957 for (i = win; i < nfr - win; i++) { 00958 fcb->compute_feat(fcb, mfc + i, feat[i - win]); 00959 } 00960 00961 feat_print_dbg(fcb, feat, nfr - win * 2, "After dynamic feature computation"); 00962 00963 if (fcb->lda) { 00964 feat_lda_transform(fcb, feat, nfr - win * 2); 00965 feat_print_dbg(fcb, feat, nfr - win * 2, "After LDA"); 00966 } 00967 00968 if (fcb->subvecs) { 00969 feat_subvec_project(fcb, feat, nfr - win * 2); 00970 feat_print_dbg(fcb, feat, nfr - win * 2, "After subvector projection"); 00971 } 00972 } 00973 00974 00987 static int32 00988 feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win, 00989 int32 sf, int32 ef, 00990 mfcc_t ***out_mfc, 00991 int32 maxfr, 00992 int32 cepsize) 00993 { 00994 FILE *fp; 00995 int32 n_float32; 00996 float32 *float_feat; 00997 struct stat statbuf; 00998 int32 i, n, byterev; 00999 int32 start_pad, end_pad; 01000 mfcc_t **mfc; 01001 01002 /* Initialize the output pointer to NULL, so that any attempts to 01003 free() it if we fail before allocating it will not segfault! */ 01004 if (out_mfc) 01005 *out_mfc = NULL; 01006 E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef); 01007 if (ef >= 0 && ef <= sf) { 01008 E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf); 01009 return -1; 01010 } 01011 01012 /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */ 01013 if ((stat_retry(file, &statbuf) < 0) 01014 || ((fp = fopen(file, "rb")) == NULL)) { 01015 E_ERROR("Failed to open file '%s' for reading: %s\n", file, strerror(errno)); 01016 return -1; 01017 } 01018 01019 /* Read #floats in header */ 01020 if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) { 01021 E_ERROR("%s: fread(#floats) failed\n", file); 01022 fclose(fp); 01023 return -1; 01024 } 01025 01026 /* Check if n_float32 matches file size */ 01027 byterev = 0; 01028 if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */ 01029 n = n_float32; 01030 SWAP_INT32(&n); 01031 01032 if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */ 01033 E_ERROR 01034 ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n", 01035 file, n_float32, n_float32, statbuf.st_size, 01036 statbuf.st_size); 01037 fclose(fp); 01038 return -1; 01039 } 01040 01041 n_float32 = n; 01042 byterev = 1; 01043 } 01044 if (n_float32 <= 0) { 01045 E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32); 01046 fclose(fp); 01047 return -1; 01048 } 01049 01050 /* Convert n to #frames of input */ 01051 n = n_float32 / cepsize; 01052 if (n * cepsize != n_float32) { 01053 E_ERROR("Header size field: %d; not multiple of %d\n", n_float32, 01054 cepsize); 01055 fclose(fp); 01056 return -1; 01057 } 01058 01059 /* Check start and end frames */ 01060 if (sf > 0) { 01061 if (sf >= n) { 01062 E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file, 01063 sf, n); 01064 fclose(fp); 01065 return -1; 01066 } 01067 } 01068 if (ef < 0) 01069 ef = n-1; 01070 else if (ef >= n) { 01071 E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n", 01072 file, ef, n); 01073 ef = n-1; 01074 } 01075 01076 /* Add window to start and end frames */ 01077 sf -= win; 01078 ef += win; 01079 if (sf < 0) { 01080 start_pad = -sf; 01081 sf = 0; 01082 } 01083 else 01084 start_pad = 0; 01085 if (ef >= n) { 01086 end_pad = ef - n + 1; 01087 ef = n - 1; 01088 } 01089 else 01090 end_pad = 0; 01091 01092 /* Limit n if indicated by [sf..ef] */ 01093 if ((ef - sf + 1) < n) 01094 n = (ef - sf + 1); 01095 if (maxfr > 0 && n + start_pad + end_pad > maxfr) { 01096 E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n", 01097 file, maxfr, n + start_pad + end_pad); 01098 fclose(fp); 01099 return -1; 01100 } 01101 01102 /* If no output buffer was supplied, then skip the actual data reading. */ 01103 if (out_mfc != NULL) { 01104 /* Position at desired start frame and read actual MFC data */ 01105 mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t)); 01106 if (sf > 0) 01107 fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR); 01108 n_float32 = n * cepsize; 01109 #ifdef FIXED_POINT 01110 float_feat = ckd_calloc(n_float32, sizeof(float32)); 01111 #else 01112 float_feat = mfc[start_pad]; 01113 #endif 01114 if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) { 01115 E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize); 01116 ckd_free_2d(mfc); 01117 fclose(fp); 01118 return -1; 01119 } 01120 if (byterev) { 01121 for (i = 0; i < n_float32; i++) { 01122 SWAP_FLOAT32(&float_feat[i]); 01123 } 01124 } 01125 #ifdef FIXED_POINT 01126 for (i = 0; i < n_float32; ++i) { 01127 mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]); 01128 } 01129 ckd_free(float_feat); 01130 #endif 01131 01132 /* Normalize */ 01133 feat_cmn(fcb, mfc + start_pad, n, 1, 1); 01134 feat_agc(fcb, mfc + start_pad, n, 1, 1); 01135 01136 /* Replicate start and end frames if necessary. */ 01137 for (i = 0; i < start_pad; ++i) 01138 memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t)); 01139 for (i = 0; i < end_pad; ++i) 01140 memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1], 01141 cepsize * sizeof(mfcc_t)); 01142 01143 *out_mfc = mfc; 01144 } 01145 01146 fclose(fp); 01147 return n + start_pad + end_pad; 01148 } 01149 01150 01151 01152 int32 01153 feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext, 01154 int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr) 01155 { 01156 char *path; 01157 char *ps = "/"; 01158 int32 win, nfr; 01159 int32 file_length, cepext_length, path_length = 0; 01160 mfcc_t **mfc; 01161 01162 if (fcb->cepsize <= 0) { 01163 E_ERROR("Bad cepsize: %d\n", fcb->cepsize); 01164 return -1; 01165 } 01166 01167 if (cepext == NULL) 01168 cepext = ""; 01169 01170 /* 01171 * Create mfc filename, combining file, dir and extension if 01172 * necessary 01173 */ 01174 01175 /* 01176 * First we decide about the path. If dir is defined, then use 01177 * it. Otherwise assume the filename already contains the path. 01178 */ 01179 if (dir == NULL) { 01180 dir = ""; 01181 ps = ""; 01182 /* 01183 * This is not true but some 3rd party apps 01184 * may parse the output explicitly checking for this line 01185 */ 01186 E_INFO("At directory . (current directory)\n"); 01187 } 01188 else { 01189 E_INFO("At directory %s\n", dir); 01190 /* 01191 * Do not forget the path separator! 01192 */ 01193 path_length += strlen(dir) + 1; 01194 } 01195 01196 /* 01197 * Include cepext, if it's not already part of the filename. 01198 */ 01199 file_length = strlen(file); 01200 cepext_length = strlen(cepext); 01201 if ((file_length > cepext_length) 01202 && (strcmp(file + file_length - cepext_length, cepext) == 0)) { 01203 cepext = ""; 01204 cepext_length = 0; 01205 } 01206 01207 /* 01208 * Do not forget the '\0' 01209 */ 01210 path_length += file_length + cepext_length + 1; 01211 path = (char*) ckd_calloc(path_length, sizeof(char)); 01212 01213 #ifdef HAVE_SNPRINTF 01214 /* 01215 * Paranoia is our best friend... 01216 */ 01217 while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) { 01218 path_length = file_length; 01219 path = (char*) ckd_realloc(path, path_length * sizeof(char)); 01220 } 01221 #else 01222 sprintf(path, "%s%s%s%s", dir, ps, file, cepext); 01223 #endif 01224 01225 win = feat_window_size(fcb); 01226 /* Pad maxfr with win, so we read enough raw feature data to 01227 * calculate the requisite number of dynamic features. */ 01228 if (maxfr >= 0) 01229 maxfr += win * 2; 01230 01231 if (feat != NULL) { 01232 /* Read mfc file including window or padding if necessary. */ 01233 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize); 01234 ckd_free(path); 01235 if (nfr < 0) { 01236 ckd_free_2d((void **) mfc); 01237 return -1; 01238 } 01239 01240 /* Actually compute the features */ 01241 feat_compute_utt(fcb, mfc, nfr, win, feat); 01242 01243 ckd_free_2d((void **) mfc); 01244 } 01245 else { 01246 /* Just calculate the number of frames we would need. */ 01247 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize); 01248 ckd_free(path); 01249 if (nfr < 0) 01250 return nfr; 01251 } 01252 01253 01254 return (nfr - win * 2); 01255 } 01256 01257 static int32 01258 feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep, 01259 int32 nfr, mfcc_t *** ofeat) 01260 { 01261 mfcc_t **cepbuf; 01262 int32 i, win, cepsize; 01263 01264 win = feat_window_size(fcb); 01265 cepsize = feat_cepsize(fcb); 01266 01267 /* Copy and pad out the utterance (this requires that the 01268 * feature computation functions always access the buffer via 01269 * the frame pointers, which they do) */ 01270 cepbuf = ckd_calloc(nfr + win * 2, sizeof(mfcc_t *)); 01271 memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *)); 01272 01273 /* Do normalization before we interpolate on the boundary */ 01274 feat_cmn(fcb, cepbuf + win, nfr, 1, 1); 01275 feat_agc(fcb, cepbuf + win, nfr, 1, 1); 01276 01277 /* Now interpolate */ 01278 for (i = 0; i < win; ++i) { 01279 cepbuf[i] = fcb->cepbuf[i]; 01280 memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t)); 01281 cepbuf[nfr + win + i] = fcb->cepbuf[win + i]; 01282 memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t)); 01283 } 01284 /* Compute as usual. */ 01285 feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat); 01286 ckd_free(cepbuf); 01287 return nfr; 01288 } 01289 01290 int32 01291 feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep, 01292 int32 beginutt, int32 endutt, mfcc_t *** ofeat) 01293 { 01294 int32 win, cepsize, nbufcep; 01295 int32 i, j, nfeatvec; 01296 int32 zero = 0; 01297 01298 /* Avoid having to check this everywhere. */ 01299 if (inout_ncep == NULL) inout_ncep = &zero; 01300 01301 /* Special case for entire utterances. */ 01302 if (beginutt && endutt && *inout_ncep > 0) 01303 return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat); 01304 01305 win = feat_window_size(fcb); 01306 cepsize = feat_cepsize(fcb); 01307 01308 /* Empty the input buffer on start of utterance. */ 01309 if (beginutt) 01310 fcb->bufpos = fcb->curpos; 01311 01312 /* Calculate how much data is in the buffer already. */ 01313 nbufcep = fcb->bufpos - fcb->curpos; 01314 if (nbufcep < 0) 01315 nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos; 01316 /* Add any data that we have to replicate. */ 01317 if (beginutt && *inout_ncep > 0) 01318 nbufcep += win; 01319 if (endutt) 01320 nbufcep += win; 01321 01322 /* Only consume as much input as will fit in the buffer. */ 01323 if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) { 01324 /* We also can't overwrite the trailing window, hence the 01325 * reason why win is subtracted here. */ 01326 *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win; 01327 /* Cancel end of utterance processing. */ 01328 endutt = FALSE; 01329 } 01330 01331 /* FIXME: Don't modify the input! */ 01332 feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt); 01333 feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt); 01334 01335 /* Replicate first frame into the first win frames if we're at the 01336 * beginning of the utterance and there was some actual input to 01337 * deal with. (FIXME: Not entirely sure why that condition) */ 01338 if (beginutt && *inout_ncep > 0) { 01339 for (i = 0; i < win; i++) { 01340 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0], 01341 cepsize * sizeof(mfcc_t)); 01342 fcb->bufpos %= LIVEBUFBLOCKSIZE; 01343 } 01344 /* Move the current pointer past this data. */ 01345 fcb->curpos = fcb->bufpos; 01346 nbufcep -= win; 01347 } 01348 01349 /* Copy in frame data to the circular buffer. */ 01350 for (i = 0; i < *inout_ncep; ++i) { 01351 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i], 01352 cepsize * sizeof(mfcc_t)); 01353 fcb->bufpos %= LIVEBUFBLOCKSIZE; 01354 ++nbufcep; 01355 } 01356 01357 /* Replicate last frame into the last win frames if we're at the 01358 * end of the utterance (even if there was no input, so we can 01359 * flush the output). */ 01360 if (endutt) { 01361 int32 tpos; /* Index of last input frame. */ 01362 if (fcb->bufpos == 0) 01363 tpos = LIVEBUFBLOCKSIZE - 1; 01364 else 01365 tpos = fcb->bufpos - 1; 01366 for (i = 0; i < win; ++i) { 01367 memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos], 01368 cepsize * sizeof(mfcc_t)); 01369 fcb->bufpos %= LIVEBUFBLOCKSIZE; 01370 } 01371 } 01372 01373 /* We have to leave the trailing window of frames. */ 01374 nfeatvec = nbufcep - win; 01375 if (nfeatvec <= 0) 01376 return 0; /* Do nothing. */ 01377 01378 for (i = 0; i < nfeatvec; ++i) { 01379 /* Handle wraparound cases. */ 01380 if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) { 01381 /* Use tmpcepbuf for this case. Actually, we just need the pointers. */ 01382 for (j = -win; j <= win; ++j) { 01383 int32 tmppos = 01384 (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE; 01385 fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos]; 01386 } 01387 fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]); 01388 } 01389 else { 01390 fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]); 01391 } 01392 /* Move the read pointer forward. */ 01393 ++fcb->curpos; 01394 fcb->curpos %= LIVEBUFBLOCKSIZE; 01395 } 01396 01397 if (fcb->lda) 01398 feat_lda_transform(fcb, ofeat, nfeatvec); 01399 01400 if (fcb->subvecs) 01401 feat_subvec_project(fcb, ofeat, nfeatvec); 01402 01403 return nfeatvec; 01404 } 01405 01406 feat_t * 01407 feat_retain(feat_t *f) 01408 { 01409 ++f->refcount; 01410 return f; 01411 } 01412 01413 int 01414 feat_free(feat_t * f) 01415 { 01416 if (f == NULL) 01417 return 0; 01418 if (--f->refcount > 0) 01419 return f->refcount; 01420 01421 if (f->cepbuf) 01422 ckd_free_2d((void **) f->cepbuf); 01423 ckd_free(f->tmpcepbuf); 01424 01425 if (f->name) { 01426 ckd_free((void *) f->name); 01427 } 01428 if (f->lda) 01429 ckd_free_3d((void ***) f->lda); 01430 01431 ckd_free(f->stream_len); 01432 ckd_free(f->sv_len); 01433 ckd_free(f->sv_buf); 01434 subvecs_free(f->subvecs); 01435 01436 cmn_free(f->cmn_struct); 01437 agc_free(f->agc_struct); 01438 01439 ckd_free(f); 01440 return 0; 01441 } 01442 01443 01444 void 01445 feat_report(feat_t * f) 01446 { 01447 int i; 01448 E_INFO_NOFN("Initialization of feat_t, report:\n"); 01449 E_INFO_NOFN("Feature type = %s\n", f->name); 01450 E_INFO_NOFN("Cepstral size = %d\n", f->cepsize); 01451 E_INFO_NOFN("Number of streams = %d\n", f->n_stream); 01452 for (i = 0; i < f->n_stream; i++) { 01453 E_INFO_NOFN("Vector size of stream[%d]: %d\n", i, 01454 f->stream_len[i]); 01455 } 01456 E_INFO_NOFN("Number of subvectors = %d\n", f->n_sv); 01457 for (i = 0; i < f->n_sv; i++) { 01458 int32 *sv; 01459 01460 E_INFO_NOFN("Components of subvector[%d]:", i); 01461 for (sv = f->subvecs[i]; sv && *sv != -1; ++sv) 01462 E_INFOCONT(" %d", *sv); 01463 E_INFOCONT("\n"); 01464 } 01465 E_INFO_NOFN("Whether CMN is used = %d\n", f->cmn); 01466 E_INFO_NOFN("Whether AGC is used = %d\n", f->agc); 01467 E_INFO_NOFN("Whether variance is normalized = %d\n", f->varnorm); 01468 E_INFO_NOFN("\n"); 01469 }