126 #pragma warning (disable: 4244 4996)
129 #include "sphinxbase/fe.h"
140 #define FEAT_VERSION "1.0"
141 #define FEAT_DCEP_WIN 2
145 cep_dump_dbg(
feat_t *fcb, mfcc_t **mfc, int32 nfr,
const char *text)
150 for (i = 0; i < nfr; i++) {
151 for (j = 0; j < fcb->cepsize; j++) {
152 fprintf(stderr,
"%f ", MFCC2FLOAT(mfc[i][j]));
154 fprintf(stderr,
"\n");
158 feat_print_dbg(
feat_t *fcb, mfcc_t ***feat, int32 nfr,
const char *text)
164 #define cep_dump_dbg(fcb,mfc,nfr,text)
165 #define feat_print_dbg(fcb,mfc,nfr,text)
185 if (sscanf(strp,
"%d%n", &n, &l) != 1)
186 E_FATAL(
"'%s': Couldn't read int32 @pos %d\n", str,
193 if (sscanf(strp,
"%d%n", &n2, &l) != 1)
194 E_FATAL(
"'%s': Couldn't read int32 @pos %d\n", str,
201 if ((n < 0) || (n > n2))
202 E_FATAL(
"'%s': Bad subrange spec ending @pos %d\n", str,
205 for (; n <= n2; n++) {
207 for (gn = dimlist; gn; gn = gnode_next(gn))
208 if (gnode_int32(gn) == n)
211 E_FATAL(
"'%s': Duplicate dimension ending @pos %d\n",
217 if ((*strp ==
'\0') || (*strp ==
'/'))
221 E_FATAL(
"'%s': Bad delimiter @pos %d\n", str, strp - str);
231 assert(*strp ==
'/');
237 subvec = (int32 **)
ckd_calloc(n + 1,
sizeof(int32 *));
240 for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) {
245 E_FATAL(
"'%s': 0-length subvector\n", str);
247 subvec[n] = (int32 *)
ckd_calloc(n2 + 1,
sizeof(int32));
250 for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2)
251 subvec[n][n2] = gnode_int32(gn2);
252 assert((n2 < 0) && (!gn2));
254 assert((n < 0) && (!gn));
257 for (gn = veclist; gn; gn = gnode_next(gn)) {
271 for (sv = subvecs; sv && *sv; ++sv)
280 int32 n_sv, n_dim, i;
282 if (subvecs == NULL) {
294 if (fcb->n_stream != 1) {
295 E_ERROR(
"Subvector specifications require single-stream features!");
301 for (sv = subvecs; sv && *sv; ++sv) {
304 for (d = *sv; d && *d != -1; ++d) {
310 E_ERROR(
"Total dimensionality of subvector specification %d "
316 fcb->subvecs = subvecs;
317 fcb->sv_len =
ckd_calloc(n_sv,
sizeof(*fcb->sv_len));
318 fcb->sv_buf =
ckd_calloc(n_dim,
sizeof(*fcb->sv_buf));
320 for (i = 0; i < n_sv; ++i) {
322 for (d = subvecs[i]; d && *d != -1; ++d) {
334 feat_subvec_project(
feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
338 if (fcb->subvecs == NULL)
340 for (i = 0; i < nfr; ++i) {
345 for (j = 0; j < fcb->n_sv; ++j) {
347 for (d = fcb->subvecs[j]; d && *d != -1; ++d) {
348 *out++ = inout_feat[i][0][*d];
351 memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim *
sizeof(*fcb->sv_buf));
359 mfcc_t *data, *d, ***feat;
368 for (i = 0; i < fcb->n_stream; ++i)
369 k += fcb->stream_len[i];
371 assert(k >= fcb->sv_dim);
375 data = (mfcc_t *)
ckd_calloc(nfr * k,
sizeof(mfcc_t));
377 for (i = 0; i < nfr; i++) {
396 feat_s2_4x_cep2feat(
feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
400 mfcc_t *w1, *w_1, *_w1, *_w_1;
414 memcpy(feat[0], mfc[0] + 1, (
feat_cepsize(fcb) - 1) *
sizeof(mfcc_t));
442 d2 = w_1[i] - _w_1[i];
450 f[1] = mfc[2][0] - mfc[-2][0];
452 d1 = mfc[3][0] - mfc[-1][0];
453 d2 = mfc[1][0] - mfc[-3][0];
459 feat_s3_1x39_cep2feat(
feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
463 mfcc_t *w1, *w_1, *_w1, *_w_1;
474 memcpy(feat[0], mfc[0] + 1, (
feat_cepsize(fcb) - 1) *
sizeof(mfcc_t));
489 f[1] = mfc[2][0] - mfc[-2][0];
491 d1 = mfc[3][0] - mfc[-1][0];
492 d2 = mfc[1][0] - mfc[-3][0];
505 d2 = w_1[i] - _w_1[i];
513 feat_s3_cep(
feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
520 memcpy(feat[0], mfc[0],
feat_cepsize(fcb) *
sizeof(mfcc_t));
524 feat_s3_cepwin(
feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
537 feat_s3_cep_dcep(
feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
549 memcpy(feat[0], mfc[0],
feat_cepsize(fcb) *
sizeof(mfcc_t));
563 feat_1s_c_d_dd_cep2feat(
feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
567 mfcc_t *w1, *w_1, *_w1, *_w_1;
577 memcpy(feat[0], mfc[0],
feat_cepsize(fcb) *
sizeof(mfcc_t));
583 w = mfc[FEAT_DCEP_WIN];
584 _w = mfc[-FEAT_DCEP_WIN];
595 w1 = mfc[FEAT_DCEP_WIN + 1];
596 _w1 = mfc[-FEAT_DCEP_WIN + 1];
597 w_1 = mfc[FEAT_DCEP_WIN - 1];
598 _w_1 = mfc[-FEAT_DCEP_WIN - 1];
602 d2 = w_1[i] - _w_1[i];
609 feat_1s_c_d_ld_dd_cep2feat(
feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
613 mfcc_t *w1, *w_1, *_w1, *_w_1;
623 memcpy(feat[0], mfc[0],
feat_cepsize(fcb) *
sizeof(mfcc_t));
629 w = mfc[FEAT_DCEP_WIN];
630 _w = mfc[-FEAT_DCEP_WIN];
639 w = mfc[FEAT_DCEP_WIN * 2];
640 _w = mfc[-FEAT_DCEP_WIN * 2];
651 w1 = mfc[FEAT_DCEP_WIN + 1];
652 _w1 = mfc[-FEAT_DCEP_WIN + 1];
653 w_1 = mfc[FEAT_DCEP_WIN - 1];
654 _w_1 = mfc[-FEAT_DCEP_WIN - 1];
658 d2 = w_1[i] - _w_1[i];
665 feat_copy(
feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat)
672 for (i = -win; i <= win; ++i) {
680 memcpy(feat[j] + ((i + win) * stream_len),
682 stream_len *
sizeof(mfcc_t));
698 (
"Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n",
704 if (strcmp(type,
"s2_4x") == 0) {
707 E_ERROR(
"s2_4x features require cepsize == 13\n");
713 fcb->stream_len = (int32 *)
ckd_calloc(4,
sizeof(int32));
714 fcb->stream_len[0] = 12;
715 fcb->stream_len[1] = 24;
716 fcb->stream_len[2] = 3;
717 fcb->stream_len[3] = 12;
719 fcb->window_size = 4;
720 fcb->compute_feat = feat_s2_4x_cep2feat;
722 else if ((strcmp(type,
"s3_1x39") == 0) || (strcmp(type,
"1s_12c_12d_3p_12dd") == 0)) {
725 E_ERROR(
"s2_4x features require cepsize == 13\n");
731 fcb->stream_len = (int32 *)
ckd_calloc(1,
sizeof(int32));
732 fcb->stream_len[0] = 39;
734 fcb->window_size = 3;
735 fcb->compute_feat = feat_s3_1x39_cep2feat;
737 else if (strncmp(type,
"1s_c_d_dd", 9) == 0) {
738 fcb->cepsize = cepsize;
740 fcb->stream_len = (int32 *)
ckd_calloc(1,
sizeof(int32));
741 fcb->stream_len[0] = cepsize * 3;
742 fcb->out_dim = cepsize * 3;
743 fcb->window_size = FEAT_DCEP_WIN + 1;
744 fcb->compute_feat = feat_1s_c_d_dd_cep2feat;
746 else if (strncmp(type,
"1s_c_d_ld_dd", 12) == 0) {
747 fcb->cepsize = cepsize;
749 fcb->stream_len = (int32 *)
ckd_calloc(1,
sizeof(int32));
750 fcb->stream_len[0] = cepsize * 4;
751 fcb->out_dim = cepsize * 4;
752 fcb->window_size = FEAT_DCEP_WIN * 2;
753 fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat;
755 else if (strncmp(type,
"cep_dcep", 8) == 0 || strncmp(type,
"1s_c_d", 6) == 0) {
757 fcb->cepsize = cepsize;
759 fcb->stream_len = (int32 *)
ckd_calloc(1,
sizeof(int32));
761 fcb->out_dim = fcb->stream_len[0];
762 fcb->window_size = 2;
763 fcb->compute_feat = feat_s3_cep_dcep;
765 else if (strncmp(type,
"cep", 3) == 0 || strncmp(type,
"1s_c", 4) == 0) {
767 fcb->cepsize = cepsize;
769 fcb->stream_len = (int32 *)
ckd_calloc(1,
sizeof(int32));
771 fcb->out_dim = fcb->stream_len[0];
772 fcb->window_size = 0;
773 fcb->compute_feat = feat_s3_cep;
775 else if (strncmp(type,
"1s_3c", 5) == 0 || strncmp(type,
"1s_4c", 5) == 0) {
777 if (strncmp(type,
"1s_3c", 5) == 0)
778 fcb->window_size = 3;
780 fcb->window_size = 4;
782 fcb->cepsize = cepsize;
784 fcb->stream_len = (int32 *)
ckd_calloc(1,
sizeof(int32));
785 fcb->stream_len[0] =
feat_cepsize(fcb) * (2 * fcb->window_size + 1);
786 fcb->out_dim = fcb->stream_len[0];
787 fcb->compute_feat = feat_s3_cepwin;
803 for (i = 1; i < l - 1; i++) {
804 if (mtype[i] ==
',') {
808 else if (mtype[i] ==
':') {
810 fcb->window_size = atoi(mtype + i + 1);
816 fcb->stream_len = (int32 *)
ckd_calloc(k,
sizeof(int32));
823 while (sscanf(strp,
"%s%n", wd, &l) == 1) {
825 if ((i >= fcb->n_stream)
826 || (sscanf(wd,
"%d", &(fcb->stream_len[i])) != 1)
827 || (fcb->stream_len[i] <= 0))
828 E_FATAL(
"Bad feature type argument\n");
830 fcb->cepsize += fcb->stream_len[i];
831 if (fcb->window_size > 0)
832 fcb->stream_len[i] *= (fcb->window_size * 2 + 1);
834 fcb->out_dim += fcb->stream_len[i];
837 if (i != fcb->n_stream)
838 E_FATAL(
"Bad feature type argument\n");
839 if (fcb->cepsize != cepsize)
840 E_FATAL(
"Bad feature type argument\n");
843 fcb->compute_feat = feat_copy;
851 fcb->varnorm = varnorm;
852 if (agc != AGC_NONE) {
860 agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0);
872 sizeof(*fcb->tmpcepbuf));
883 for (i = 0; i < nfr; i++) {
884 fprintf(fp,
"%8d:\n", i);
887 fprintf(fp,
"\t%2d:", j);
890 fprintf(fp,
" %8.4f", MFCC2FLOAT(feat[i][j][k]));
899 feat_cmn(
feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
903 if (!(beginutt && endutt)
904 && cmn_type != CMN_NONE)
905 cmn_type = CMN_PRIOR;
909 cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
912 cmn_prior(fcb->cmn_struct, mfc, fcb->varnorm, nfr);
919 cep_dump_dbg(fcb, mfc, nfr,
"After CMN");
923 feat_agc(
feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt)
927 if (!(beginutt && endutt)
928 && agc_type != AGC_NONE)
933 agc_max(fcb->agc_struct, mfc, nfr);
936 agc_emax(fcb->agc_struct, mfc, nfr);
946 cep_dump_dbg(fcb, mfc, nfr,
"After AGC");
950 feat_compute_utt(
feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat)
954 cep_dump_dbg(fcb, mfc, nfr,
"Incoming features (after padding)");
957 for (i = win; i < nfr - win; i++) {
958 fcb->compute_feat(fcb, mfc + i, feat[i - win]);
961 feat_print_dbg(fcb, feat, nfr - win * 2,
"After dynamic feature computation");
965 feat_print_dbg(fcb, feat, nfr - win * 2,
"After LDA");
969 feat_subvec_project(fcb, feat, nfr - win * 2);
970 feat_print_dbg(fcb, feat, nfr - win * 2,
"After subvector projection");
988 feat_s2mfc_read_norm_pad(
feat_t *fcb,
char *file, int32 win,
999 int32 start_pad, end_pad;
1006 E_INFO(
"Reading mfc file: '%s'[%d..%d]\n", file, sf, ef);
1007 if (ef >= 0 && ef <= sf) {
1008 E_ERROR(
"%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf);
1014 || ((fp = fopen(file,
"rb")) == NULL)) {
1015 E_ERROR(
"Failed to open file '%s' for reading: %s\n", file, strerror(errno));
1020 if (
fread_retry(&n_float32,
sizeof(int32), 1, fp) != 1) {
1021 E_ERROR(
"%s: fread(#floats) failed\n", file);
1028 if ((int32) (n_float32 *
sizeof(float32) + 4) != (int32) statbuf.st_size) {
1032 if ((int32) (n *
sizeof(float32) + 4) != (int32) (statbuf.st_size)) {
1034 (
"%s: Header size field: %d(%08x); filesize: %d(%08x)\n",
1035 file, n_float32, n_float32, statbuf.st_size,
1044 if (n_float32 <= 0) {
1045 E_ERROR(
"%s: Header size field (#floats) = %d\n", file, n_float32);
1051 n = n_float32 / cepsize;
1052 if (n * cepsize != n_float32) {
1053 E_ERROR(
"Header size field: %d; not multiple of %d\n", n_float32,
1062 E_ERROR(
"%s: Start frame (%d) beyond file size (%d)\n", file,
1071 E_WARN(
"%s: End frame (%d) beyond file size (%d), will truncate\n",
1086 end_pad = ef - n + 1;
1093 if ((ef - sf + 1) < n)
1095 if (maxfr > 0 && n + start_pad + end_pad > maxfr) {
1096 E_ERROR(
"%s: Maximum output size(%d frames) < actual #frames(%d)\n",
1097 file, maxfr, n + start_pad + end_pad);
1103 if (out_mfc != NULL) {
1105 mfc = (mfcc_t **)
ckd_calloc_2d(n + start_pad + end_pad, cepsize,
sizeof(mfcc_t));
1107 fseek(fp, sf * cepsize *
sizeof(float32), SEEK_CUR);
1108 n_float32 = n * cepsize;
1110 float_feat =
ckd_calloc(n_float32,
sizeof(float32));
1112 float_feat = mfc[start_pad];
1114 if (
fread_retry(float_feat,
sizeof(float32), n_float32, fp) != n_float32) {
1115 E_ERROR(
"%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize);
1121 for (i = 0; i < n_float32; i++) {
1122 SWAP_FLOAT32(&float_feat[i]);
1126 for (i = 0; i < n_float32; ++i) {
1127 mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]);
1133 feat_cmn(fcb, mfc + start_pad, n, 1, 1);
1134 feat_agc(fcb, mfc + start_pad, n, 1, 1);
1137 for (i = 0; i < start_pad; ++i)
1138 memcpy(mfc[i], mfc[start_pad], cepsize *
sizeof(mfcc_t));
1139 for (i = 0; i < end_pad; ++i)
1140 memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1],
1141 cepsize *
sizeof(mfcc_t));
1147 return n + start_pad + end_pad;
1154 int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr)
1159 int32 file_length, cepext_length, path_length = 0;
1162 if (fcb->cepsize <= 0) {
1163 E_ERROR(
"Bad cepsize: %d\n", fcb->cepsize);
1186 E_INFO(
"At directory . (current directory)\n");
1189 E_INFO(
"At directory %s\n", dir);
1193 path_length += strlen(dir) + 1;
1199 file_length = strlen(file);
1200 cepext_length = strlen(cepext);
1201 if ((file_length > cepext_length)
1202 && (strcmp(file + file_length - cepext_length, cepext) == 0)) {
1210 path_length += file_length + cepext_length + 1;
1211 path = (
char*)
ckd_calloc(path_length,
sizeof(
char));
1213 #ifdef HAVE_SNPRINTF
1217 while ((file_length = snprintf(path, path_length,
"%s%s%s%s", dir, ps, file, cepext)) > path_length) {
1218 path_length = file_length;
1219 path = (
char*)
ckd_realloc(path, path_length *
sizeof(
char));
1222 sprintf(path,
"%s%s%s%s", dir, ps, file, cepext);
1233 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize);
1241 feat_compute_utt(fcb, mfc, nfr, win, feat);
1247 nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize);
1254 return (nfr - win * 2);
1258 feat_s2mfc2feat_block_utt(
feat_t * fcb, mfcc_t ** uttcep,
1259 int32 nfr, mfcc_t *** ofeat)
1262 int32 i, win, cepsize;
1270 cepbuf =
ckd_calloc(nfr + win * 2,
sizeof(mfcc_t *));
1271 memcpy(cepbuf + win, uttcep, nfr *
sizeof(mfcc_t *));
1274 feat_cmn(fcb, cepbuf + win, nfr, 1, 1);
1275 feat_agc(fcb, cepbuf + win, nfr, 1, 1);
1278 for (i = 0; i < win; ++i) {
1279 cepbuf[i] = fcb->cepbuf[i];
1280 memcpy(cepbuf[i], uttcep[0], cepsize *
sizeof(mfcc_t));
1281 cepbuf[nfr + win + i] = fcb->cepbuf[win + i];
1282 memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize *
sizeof(mfcc_t));
1285 feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat);
1292 int32 beginutt, int32 endutt, mfcc_t *** ofeat)
1294 int32 win, cepsize, nbufcep;
1295 int32 i, j, nfeatvec;
1299 if (inout_ncep == NULL) inout_ncep = &zero;
1302 if (beginutt && endutt && *inout_ncep > 0)
1303 return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat);
1310 fcb->bufpos = fcb->curpos;
1313 nbufcep = fcb->bufpos - fcb->curpos;
1315 nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos;
1317 if (beginutt && *inout_ncep > 0)
1323 if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) {
1326 *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win;
1332 feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt);
1333 feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt);
1338 if (beginutt && *inout_ncep > 0) {
1339 for (i = 0; i < win; i++) {
1340 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0],
1341 cepsize *
sizeof(mfcc_t));
1342 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1345 fcb->curpos = fcb->bufpos;
1350 for (i = 0; i < *inout_ncep; ++i) {
1351 memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i],
1352 cepsize *
sizeof(mfcc_t));
1353 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1362 if (fcb->bufpos == 0)
1363 tpos = LIVEBUFBLOCKSIZE - 1;
1365 tpos = fcb->bufpos - 1;
1366 for (i = 0; i < win; ++i) {
1367 memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos],
1368 cepsize *
sizeof(mfcc_t));
1369 fcb->bufpos %= LIVEBUFBLOCKSIZE;
1374 nfeatvec = nbufcep - win;
1378 for (i = 0; i < nfeatvec; ++i) {
1380 if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) {
1382 for (j = -win; j <= win; ++j) {
1384 (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE;
1385 fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos];
1387 fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]);
1390 fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]);
1394 fcb->curpos %= LIVEBUFBLOCKSIZE;
1401 feat_subvec_project(fcb, ofeat, nfeatvec);
1418 if (--f->refcount > 0)
1436 cmn_free(f->cmn_struct);
1448 E_INFO_NOFN(
"Initialization of feat_t, report:\n");
1451 E_INFO_NOFN(
"Number of streams = %d\n", f->n_stream);
1452 for (i = 0; i < f->n_stream; i++) {
1456 E_INFO_NOFN(
"Number of subvectors = %d\n", f->n_sv);
1457 for (i = 0; i < f->n_sv; i++) {
1461 for (sv = f->subvecs[i]; sv && *sv != -1; ++sv)
1465 E_INFO_NOFN(
"Whether CMN is used = %d\n", f->cmn);
1466 E_INFO_NOFN(
"Whether AGC is used = %d\n", f->agc);
1467 E_INFO_NOFN(
"Whether variance is normalized = %d\n", f->varnorm);