00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00042
00043 #include <string.h>
00044 #include <assert.h>
00045
00046
00047 #include <ckd_alloc.h>
00048 #include <listelem_alloc.h>
00049 #include <err.h>
00050
00051
00052 #include "ngram_search_fwdtree.h"
00053 #include "phone_loop_search.h"
00054
00055
00056 #define __CHAN_DUMP__ 0
00057 #if __CHAN_DUMP__
00058 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
00059 #else
00060 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
00061 #endif
00062
00063
00064
00065
00066
00067 static void
00068 init_search_tree(ngram_search_t *ngs)
00069 {
00070 int32 w, ndiph, i, n_words, n_ci;
00071 dict_t *dict = ps_search_dict(ngs);
00072 bitvec_t *dimap;
00073
00074 n_words = ps_search_n_words(ngs);
00075 ngs->homophone_set = ckd_calloc(n_words, sizeof(*ngs->homophone_set));
00076
00077
00078 ndiph = 0;
00079 ngs->n_1ph_words = 0;
00080 n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
00081
00082 dimap = bitvec_alloc(n_ci * n_ci);
00083 for (w = 0; w < n_words; w++) {
00084 if (!dict_real_word(dict, w))
00085 continue;
00086 if (dict_is_single_phone(dict, w))
00087 ++ngs->n_1ph_words;
00088 else {
00089 int ph0, ph1;
00090 ph0 = dict_first_phone(dict, w);
00091 ph1 = dict_second_phone(dict, w);
00092
00093 if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
00094 bitvec_set(dimap, ph0 * n_ci + ph1);
00095 ++ndiph;
00096 }
00097 }
00098 }
00099 E_INFO("%d unique initial diphones\n", ndiph);
00100 bitvec_free(dimap);
00101
00102
00103 ngs->n_1ph_words += dict_num_fillers(dict) + 2;
00104 ngs->n_root_chan_alloc = ndiph + 1;
00105
00106
00107 for (w = 0; w < n_words; ++w) {
00108 if (dict_real_word(dict, w))
00109 continue;
00110 if (!dict_is_single_phone(dict, w)) {
00111 E_WARN("Filler word %d = %s has more than one phone, ignoring it.\n",
00112 w, dict_wordstr(dict, w));
00113 --ngs->n_1ph_words;
00114 }
00115 }
00116
00117
00118 ngs->root_chan =
00119 ckd_calloc(ngs->n_root_chan_alloc, sizeof(*ngs->root_chan));
00120 for (i = 0; i < ngs->n_root_chan_alloc; i++) {
00121 hmm_init(ngs->hmmctx, &ngs->root_chan[i].hmm, TRUE, -1, -1);
00122 ngs->root_chan[i].penult_phn_wid = -1;
00123 ngs->root_chan[i].next = NULL;
00124 }
00125
00126
00127
00128 ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
00129 i = 0;
00130 for (w = 0; w < n_words; w++) {
00131 if (!dict_is_single_phone(dict, w))
00132 continue;
00133
00134 ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
00135 ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w);
00136 hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
00137 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone),
00138 ngs->rhmm_1ph[i].ciphone);
00139 ngs->rhmm_1ph[i].next = NULL;
00140
00141 ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
00142 i++;
00143 }
00144
00145 ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
00146 sizeof(*ngs->single_phone_wid));
00147 E_INFO("%d root, %d non-root channels, %d single-phone words\n",
00148 ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00149 }
00150
00151
00152
00153
00154 static void
00155 init_nonroot_chan(ngram_search_t *ngs, chan_t * hmm, int32 ph, int32 ci)
00156 {
00157 hmm->next = NULL;
00158 hmm->alt = NULL;
00159 hmm->info.penult_phn_wid = -1;
00160 hmm->ciphone = ci;
00161 hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, ph, ci);
00162 }
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174 static void
00175 create_search_tree(ngram_search_t *ngs)
00176 {
00177 chan_t *hmm;
00178 root_chan_t *rhmm;
00179 int32 w, i, j, p, ph;
00180 int32 n_words;
00181 dict_t *dict = ps_search_dict(ngs);
00182 dict2pid_t *d2p = ps_search_dict2pid(ngs);
00183
00184 n_words = ps_search_n_words(ngs);
00185
00186 E_INFO("Creating search tree\n");
00187
00188 for (w = 0; w < n_words; w++)
00189 ngs->homophone_set[w] = -1;
00190
00191 E_INFO("before: %d root, %d non-root channels, %d single-phone words\n",
00192 ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00193
00194 ngs->n_1ph_LMwords = 0;
00195 ngs->n_root_chan = 0;
00196 ngs->n_nonroot_chan = 0;
00197
00198 for (w = 0; w < n_words; w++) {
00199 int ciphone, ci2phone;
00200
00201
00202 if (!ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
00203 continue;
00204
00205
00206 if (dict_is_single_phone(dict, w)) {
00207 E_DEBUG(1,("single_phone_wid[%d] = %s\n",
00208 ngs->n_1ph_LMwords, dict_wordstr(dict, w)));
00209 ngs->single_phone_wid[ngs->n_1ph_LMwords++] = w;
00210 continue;
00211 }
00212
00213
00214
00215 ciphone = dict_first_phone(dict, w);
00216 ci2phone = dict_second_phone(dict, w);
00217 for (i = 0; i < ngs->n_root_chan; ++i) {
00218 if (ngs->root_chan[i].ciphone == ciphone
00219 && ngs->root_chan[i].ci2phone == ci2phone)
00220 break;
00221 }
00222 if (i == ngs->n_root_chan) {
00223 rhmm = &(ngs->root_chan[ngs->n_root_chan]);
00224 rhmm->hmm.tmatid = ciphone;
00225
00226 hmm_mpx_ssid(&rhmm->hmm, 0) =
00227 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
00228 rhmm->ciphone = ciphone;
00229 rhmm->ci2phone = ci2phone;
00230 ngs->n_root_chan++;
00231 }
00232 else
00233 rhmm = &(ngs->root_chan[i]);
00234
00235 E_DEBUG(2, ("word %s rhmm %d\n", dict_wordstr(dict, w), rhmm - ngs->root_chan));
00236
00237 if (dict_pronlen(dict, w) == 2) {
00238
00239 if ((j = rhmm->penult_phn_wid) < 0)
00240 rhmm->penult_phn_wid = w;
00241 else {
00242 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
00243 ngs->homophone_set[j] = w;
00244 }
00245 }
00246 else {
00247
00248 ph = dict2pid_internal(d2p, w, 1);
00249 hmm = rhmm->next;
00250 if (hmm == NULL) {
00251 rhmm->next = hmm = listelem_malloc(ngs->chan_alloc);
00252 init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1));
00253 ngs->n_nonroot_chan++;
00254 }
00255 else {
00256 chan_t *prev_hmm = NULL;
00257
00258 for (; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph); hmm = hmm->alt)
00259 prev_hmm = hmm;
00260 if (!hmm) {
00261 prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
00262 init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1));
00263 ngs->n_nonroot_chan++;
00264 }
00265 }
00266 E_DEBUG(3,("phone %s = %d\n",
00267 bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
00268 dict_second_phone(dict, w)), ph));
00269 for (p = 2; p < dict_pronlen(dict, w) - 1; p++) {
00270 ph = dict2pid_internal(d2p, w, p);
00271 if (!hmm->next) {
00272 hmm->next = listelem_malloc(ngs->chan_alloc);
00273 hmm = hmm->next;
00274 init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p));
00275 ngs->n_nonroot_chan++;
00276 }
00277 else {
00278 chan_t *prev_hmm = NULL;
00279
00280 for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
00281 hmm = hmm->alt)
00282 prev_hmm = hmm;
00283 if (!hmm) {
00284 prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
00285 init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p));
00286 ngs->n_nonroot_chan++;
00287 }
00288 }
00289 E_DEBUG(3,("phone %s = %d\n",
00290 bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
00291 dict_pron(dict, w, p)), ph));
00292 }
00293
00294
00295 if ((j = hmm->info.penult_phn_wid) < 0)
00296 hmm->info.penult_phn_wid = w;
00297 else {
00298 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
00299 ngs->homophone_set[j] = w;
00300 }
00301 }
00302 }
00303
00304 ngs->n_1ph_words = ngs->n_1ph_LMwords;
00305
00306
00307 for (w = 0; w < n_words; ++w) {
00308
00309 if (!dict_is_single_phone(dict, w))
00310 continue;
00311
00312 if (dict_real_word(dict, w))
00313 continue;
00314 if (ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
00315 continue;
00316 E_DEBUG(1,("single_phone_wid[%d] = %s\n",
00317 ngs->n_1ph_words, dict_wordstr(dict, w)));
00318 ngs->single_phone_wid[ngs->n_1ph_words++] = w;
00319 }
00320
00321 if (ngs->n_nonroot_chan >= ngs->max_nonroot_chan) {
00322
00323 ngs->max_nonroot_chan = ngs->n_nonroot_chan + 128;
00324 E_INFO("after: max nonroot chan increased to %d\n", ngs->max_nonroot_chan);
00325
00326
00327 if (ngs->active_chan_list)
00328 ckd_free_2d(ngs->active_chan_list);
00329 ngs->active_chan_list = ckd_calloc_2d(2, ngs->max_nonroot_chan,
00330 sizeof(**ngs->active_chan_list));
00331 }
00332
00333 E_INFO("after: %d root, %d non-root channels, %d single-phone words\n",
00334 ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00335 }
00336
00337 static void
00338 reinit_search_subtree(ngram_search_t *ngs, chan_t * hmm)
00339 {
00340 chan_t *child, *sibling;
00341
00342
00343 for (child = hmm->next; child; child = sibling) {
00344 sibling = child->alt;
00345 reinit_search_subtree(ngs, child);
00346 }
00347
00348
00349 hmm_deinit(&hmm->hmm);
00350 listelem_free(ngs->chan_alloc, hmm);
00351 }
00352
00353
00354
00355
00356
00357 static void
00358 reinit_search_tree(ngram_search_t *ngs)
00359 {
00360 int32 i;
00361 chan_t *hmm, *sibling;
00362
00363 for (i = 0; i < ngs->n_root_chan; i++) {
00364 hmm = ngs->root_chan[i].next;
00365
00366 while (hmm) {
00367 sibling = hmm->alt;
00368 reinit_search_subtree(ngs, hmm);
00369 hmm = sibling;
00370 }
00371
00372 ngs->root_chan[i].penult_phn_wid = -1;
00373 ngs->root_chan[i].next = NULL;
00374 }
00375 ngs->n_nonroot_chan = 0;
00376 }
00377
00378 void
00379 ngram_fwdtree_init(ngram_search_t *ngs)
00380 {
00381
00382 ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
00383 sizeof(*ngs->bestbp_rc));
00384 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
00385 sizeof(*ngs->lastphn_cand));
00386 init_search_tree(ngs);
00387 create_search_tree(ngs);
00388 }
00389
00390 static void
00391 deinit_search_tree(ngram_search_t *ngs)
00392 {
00393 int i, w, n_words;
00394
00395 n_words = ps_search_n_words(ngs);
00396 for (i = 0; i < ngs->n_root_chan_alloc; i++) {
00397 hmm_deinit(&ngs->root_chan[i].hmm);
00398 }
00399 if (ngs->rhmm_1ph) {
00400 for (i = w = 0; w < n_words; ++w) {
00401 if (!dict_is_single_phone(ps_search_dict(ngs), w))
00402 continue;
00403 hmm_deinit(&ngs->rhmm_1ph[i].hmm);
00404 ++i;
00405 }
00406 ckd_free(ngs->rhmm_1ph);
00407 ngs->rhmm_1ph = NULL;
00408 }
00409 ngs->n_root_chan = 0;
00410 ngs->n_root_chan_alloc = 0;
00411 ckd_free(ngs->root_chan);
00412 ngs->root_chan = NULL;
00413 ckd_free(ngs->single_phone_wid);
00414 ngs->single_phone_wid = NULL;
00415 ckd_free(ngs->homophone_set);
00416 ngs->homophone_set = NULL;
00417 }
00418
00419 void
00420 ngram_fwdtree_deinit(ngram_search_t *ngs)
00421 {
00422
00423 reinit_search_tree(ngs);
00424
00425 deinit_search_tree(ngs);
00426
00427 ngs->max_nonroot_chan = 0;
00428 ckd_free_2d(ngs->active_chan_list);
00429 ngs->active_chan_list = NULL;
00430 ckd_free(ngs->cand_sf);
00431 ngs->cand_sf = NULL;
00432 ckd_free(ngs->bestbp_rc);
00433 ngs->bestbp_rc = NULL;
00434 ckd_free(ngs->lastphn_cand);
00435 ngs->lastphn_cand = NULL;
00436 }
00437
00438 int
00439 ngram_fwdtree_reinit(ngram_search_t *ngs)
00440 {
00441
00442 reinit_search_tree(ngs);
00443
00444 deinit_search_tree(ngs);
00445
00446 ckd_free(ngs->lastphn_cand);
00447 ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
00448 sizeof(*ngs->lastphn_cand));
00449 ckd_free(ngs->word_chan);
00450 ngs->word_chan = ckd_calloc(ps_search_n_words(ngs),
00451 sizeof(*ngs->word_chan));
00452
00453 init_search_tree(ngs);
00454 create_search_tree(ngs);
00455 return 0;
00456 }
00457
00458 void
00459 ngram_fwdtree_start(ngram_search_t *ngs)
00460 {
00461 ps_search_t *base = (ps_search_t *)ngs;
00462 int32 i, w, n_words;
00463 root_chan_t *rhmm;
00464
00465 n_words = ps_search_n_words(ngs);
00466
00467
00468 memset(&ngs->st, 0, sizeof(ngs->st));
00469
00470
00471 ngs->bpidx = 0;
00472 ngs->bss_head = 0;
00473
00474
00475 for (i = 0; i < n_words; ++i)
00476 ngs->word_lat_idx[i] = NO_BP;
00477
00478
00479 ngs->n_active_chan[0] = ngs->n_active_chan[1] = 0;
00480 ngs->n_active_word[0] = ngs->n_active_word[1] = 0;
00481
00482
00483 ngs->best_score = 0;
00484 ngs->renormalized = 0;
00485
00486
00487 for (i = 0; i < n_words; i++)
00488 ngs->last_ltrans[i].sf = -1;
00489 ngs->n_frame = 0;
00490
00491
00492 ckd_free(base->hyp_str);
00493 base->hyp_str = NULL;
00494
00495
00496
00497 for (i = 0; i < ngs->n_1ph_words; i++) {
00498 w = ngs->single_phone_wid[i];
00499 rhmm = (root_chan_t *) ngs->word_chan[w];
00500 hmm_clear(&rhmm->hmm);
00501 }
00502
00503
00504 rhmm = (root_chan_t *) ngs->word_chan[dict_startwid(ps_search_dict(ngs))];
00505 hmm_clear(&rhmm->hmm);
00506 hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
00507 }
00508
00509
00510
00511
00512
00513 static void
00514 compute_sen_active(ngram_search_t *ngs, int frame_idx)
00515 {
00516 root_chan_t *rhmm;
00517 chan_t *hmm, **acl;
00518 int32 i, w, *awl;
00519
00520 acmod_clear_active(ps_search_acmod(ngs));
00521
00522
00523 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00524 if (hmm_frame(&rhmm->hmm) == frame_idx)
00525 acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00526 }
00527
00528
00529 i = ngs->n_active_chan[frame_idx & 0x1];
00530 acl = ngs->active_chan_list[frame_idx & 0x1];
00531 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00532 acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00533 }
00534
00535
00536 i = ngs->n_active_word[frame_idx & 0x1];
00537 awl = ngs->active_word_list[frame_idx & 0x1];
00538 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00539 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00540 acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00541 }
00542 }
00543 for (i = 0; i < ngs->n_1ph_words; i++) {
00544 w = ngs->single_phone_wid[i];
00545 rhmm = (root_chan_t *) ngs->word_chan[w];
00546
00547 if (hmm_frame(&rhmm->hmm) == frame_idx)
00548 acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00549 }
00550 }
00551
00552 static void
00553 renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
00554 {
00555 root_chan_t *rhmm;
00556 chan_t *hmm, **acl;
00557 int32 i, w, *awl;
00558
00559
00560 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00561 if (hmm_frame(&rhmm->hmm) == frame_idx) {
00562 hmm_normalize(&rhmm->hmm, norm);
00563 }
00564 }
00565
00566
00567 i = ngs->n_active_chan[frame_idx & 0x1];
00568 acl = ngs->active_chan_list[frame_idx & 0x1];
00569 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00570 hmm_normalize(&hmm->hmm, norm);
00571 }
00572
00573
00574 i = ngs->n_active_word[frame_idx & 0x1];
00575 awl = ngs->active_word_list[frame_idx & 0x1];
00576 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00577 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00578 hmm_normalize(&hmm->hmm, norm);
00579 }
00580 }
00581 for (i = 0; i < ngs->n_1ph_words; i++) {
00582 w = ngs->single_phone_wid[i];
00583 rhmm = (root_chan_t *) ngs->word_chan[w];
00584 if (hmm_frame(&rhmm->hmm) == frame_idx) {
00585 hmm_normalize(&rhmm->hmm, norm);
00586 }
00587 }
00588
00589 ngs->renormalized = TRUE;
00590 }
00591
00592 static int32
00593 eval_root_chan(ngram_search_t *ngs, int frame_idx)
00594 {
00595 root_chan_t *rhmm;
00596 int32 i, bestscore;
00597
00598 bestscore = WORST_SCORE;
00599 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00600 if (hmm_frame(&rhmm->hmm) == frame_idx) {
00601 int32 score = chan_v_eval(rhmm);
00602 if (score BETTER_THAN bestscore)
00603 bestscore = score;
00604 ++ngs->st.n_root_chan_eval;
00605 }
00606 }
00607 return (bestscore);
00608 }
00609
00610 static int32
00611 eval_nonroot_chan(ngram_search_t *ngs, int frame_idx)
00612 {
00613 chan_t *hmm, **acl;
00614 int32 i, bestscore;
00615
00616 i = ngs->n_active_chan[frame_idx & 0x1];
00617 acl = ngs->active_chan_list[frame_idx & 0x1];
00618 bestscore = WORST_SCORE;
00619 ngs->st.n_nonroot_chan_eval += i;
00620
00621 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00622 int32 score = chan_v_eval(hmm);
00623 assert(hmm_frame(&hmm->hmm) == frame_idx);
00624 if (score BETTER_THAN bestscore)
00625 bestscore = score;
00626 }
00627
00628 return bestscore;
00629 }
00630
00631 static int32
00632 eval_word_chan(ngram_search_t *ngs, int frame_idx)
00633 {
00634 root_chan_t *rhmm;
00635 chan_t *hmm;
00636 int32 i, w, bestscore, *awl, j, k;
00637
00638 k = 0;
00639 bestscore = WORST_SCORE;
00640 awl = ngs->active_word_list[frame_idx & 0x1];
00641
00642 i = ngs->n_active_word[frame_idx & 0x1];
00643 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00644 assert(bitvec_is_set(ngs->word_active, w));
00645 bitvec_clear(ngs->word_active, w);
00646 assert(ngs->word_chan[w] != NULL);
00647
00648 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00649 int32 score;
00650
00651 assert(hmm_frame(&hmm->hmm) == frame_idx);
00652 score = chan_v_eval(hmm);
00653
00654
00655 if (score BETTER_THAN bestscore)
00656 bestscore = score;
00657
00658 k++;
00659 }
00660 }
00661
00662
00663 j = 0;
00664 for (i = 0; i < ngs->n_1ph_words; i++) {
00665 int32 score;
00666
00667 w = ngs->single_phone_wid[i];
00668 rhmm = (root_chan_t *) ngs->word_chan[w];
00669 if (hmm_frame(&rhmm->hmm) < frame_idx)
00670 continue;
00671
00672 score = chan_v_eval(rhmm);
00673
00674 if (score BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
00675 bestscore = score;
00676
00677 j++;
00678 }
00679
00680 ngs->st.n_last_chan_eval += k + j;
00681 ngs->st.n_nonroot_chan_eval += k + j;
00682 ngs->st.n_word_lastchan_eval +=
00683 ngs->n_active_word[frame_idx & 0x1] + j;
00684
00685 return bestscore;
00686 }
00687
00688 static int32
00689 evaluate_channels(ngram_search_t *ngs, int16 const *senone_scores, int frame_idx)
00690 {
00691 int32 bs;
00692
00693 hmm_context_set_senscore(ngs->hmmctx, senone_scores);
00694 ngs->best_score = eval_root_chan(ngs, frame_idx);
00695 if ((bs = eval_nonroot_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
00696 ngs->best_score = bs;
00697 if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
00698 ngs->best_score = bs;
00699 ngs->last_phone_best_score = bs;
00700
00701 return ngs->best_score;
00702 }
00703
00704
00705
00706
00707
00708
00709 static void
00710 prune_root_chan(ngram_search_t *ngs, int frame_idx)
00711 {
00712 root_chan_t *rhmm;
00713 chan_t *hmm;
00714 int32 i, nf, w;
00715 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
00716 chan_t **nacl;
00717 lastphn_cand_t *candp;
00718 phone_loop_search_t *pls;
00719
00720 nf = frame_idx + 1;
00721 thresh = ngs->best_score + ngs->dynamic_beam;
00722 newphone_thresh = ngs->best_score + ngs->pbeam;
00723 lastphn_thresh = ngs->best_score + ngs->lpbeam;
00724 nacl = ngs->active_chan_list[nf & 0x1];
00725 pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
00726
00727 for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
00728 E_DEBUG(3,("Root channel %d frame %d score %d thresh %d\n",
00729 i, hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm), thresh));
00730
00731 if (hmm_frame(&rhmm->hmm) < frame_idx)
00732 continue;
00733
00734 if (hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
00735 hmm_frame(&rhmm->hmm) = nf;
00736 E_DEBUG(3, ("Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->hmm)));
00737
00738
00739 newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip;
00740 if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
00741 for (hmm = rhmm->next; hmm; hmm = hmm->alt) {
00742 int32 pl_newphone_score = newphone_score
00743 + phone_loop_search_score(pls, hmm->ciphone);
00744 if (pl_newphone_score BETTER_THAN newphone_thresh) {
00745 if ((hmm_frame(&hmm->hmm) < frame_idx)
00746 || (pl_newphone_score BETTER_THAN hmm_in_score(&hmm->hmm))) {
00747 hmm_enter(&hmm->hmm, pl_newphone_score,
00748 hmm_out_history(&rhmm->hmm), nf);
00749 *(nacl++) = hmm;
00750 }
00751 }
00752 }
00753 }
00754
00755
00756
00757
00758
00759
00760 if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
00761 for (w = rhmm->penult_phn_wid; w >= 0;
00762 w = ngs->homophone_set[w]) {
00763 int32 pl_newphone_score = newphone_score
00764 + phone_loop_search_score
00765 (pls, dict_last_phone(ps_search_dict(ngs),w));
00766 E_DEBUG(3, ("wid %d newphone_score %d\n", w, pl_newphone_score));
00767 if (pl_newphone_score BETTER_THAN lastphn_thresh) {
00768 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
00769 ngs->n_lastphn_cand++;
00770 candp->wid = w;
00771 candp->score =
00772 pl_newphone_score - ngs->nwpen;
00773 candp->bp = hmm_out_history(&rhmm->hmm);
00774 }
00775 }
00776 }
00777 }
00778 }
00779 ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
00780 }
00781
00782
00783
00784
00785
00786 static void
00787 prune_nonroot_chan(ngram_search_t *ngs, int frame_idx)
00788 {
00789 chan_t *hmm, *nexthmm;
00790 int32 nf, w, i;
00791 int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
00792 chan_t **acl, **nacl;
00793 lastphn_cand_t *candp;
00794 phone_loop_search_t *pls;
00795
00796 nf = frame_idx + 1;
00797
00798 thresh = ngs->best_score + ngs->dynamic_beam;
00799 newphone_thresh = ngs->best_score + ngs->pbeam;
00800 lastphn_thresh = ngs->best_score + ngs->lpbeam;
00801 pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
00802
00803 acl = ngs->active_chan_list[frame_idx & 0x1];
00804 nacl = ngs->active_chan_list[nf & 0x1] + ngs->n_active_chan[nf & 0x1];
00805
00806 for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
00807 --i, hmm = *(acl++)) {
00808 assert(hmm_frame(&hmm->hmm) >= frame_idx);
00809
00810 if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
00811
00812 if (hmm_frame(&hmm->hmm) != nf) {
00813 hmm_frame(&hmm->hmm) = nf;
00814 *(nacl++) = hmm;
00815 }
00816
00817
00818 newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip;
00819 if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
00820 for (nexthmm = hmm->next; nexthmm; nexthmm = nexthmm->alt) {
00821 int32 pl_newphone_score = newphone_score
00822 + phone_loop_search_score(pls, nexthmm->ciphone);
00823 if ((pl_newphone_score BETTER_THAN newphone_thresh)
00824 && ((hmm_frame(&nexthmm->hmm) < frame_idx)
00825 || (pl_newphone_score
00826 BETTER_THAN hmm_in_score(&nexthmm->hmm)))) {
00827 if (hmm_frame(&nexthmm->hmm) != nf) {
00828
00829 *(nacl++) = nexthmm;
00830 }
00831 hmm_enter(&nexthmm->hmm, pl_newphone_score,
00832 hmm_out_history(&hmm->hmm), nf);
00833 }
00834 }
00835 }
00836
00837
00838
00839
00840
00841
00842 if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
00843 for (w = hmm->info.penult_phn_wid; w >= 0;
00844 w = ngs->homophone_set[w]) {
00845 int32 pl_newphone_score = newphone_score
00846 + phone_loop_search_score
00847 (pls, dict_last_phone(ps_search_dict(ngs),w));
00848 if (pl_newphone_score BETTER_THAN lastphn_thresh) {
00849 candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
00850 ngs->n_lastphn_cand++;
00851 candp->wid = w;
00852 candp->score =
00853 pl_newphone_score - ngs->nwpen;
00854 candp->bp = hmm_out_history(&hmm->hmm);
00855 }
00856 }
00857 }
00858 }
00859 else if (hmm_frame(&hmm->hmm) != nf) {
00860 hmm_clear_scores(&hmm->hmm);
00861 }
00862 }
00863 ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
00864 }
00865
00866
00867
00868
00869
00870
00871 static void
00872 last_phone_transition(ngram_search_t *ngs, int frame_idx)
00873 {
00874 int32 i, j, k, nf, bp, bplast, w;
00875 lastphn_cand_t *candp;
00876 int32 *nawl;
00877 int32 thresh;
00878 int32 bestscore, dscr;
00879 chan_t *hmm;
00880 bptbl_t *bpe;
00881 int32 n_cand_sf = 0;
00882
00883 nf = frame_idx + 1;
00884 nawl = ngs->active_word_list[nf & 0x1];
00885 ngs->st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
00886
00887
00888
00889 E_DEBUG(3, ("n_lastphn_cand %d\n", ngs->n_lastphn_cand));
00890 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
00891
00892 if (candp->bp == -1)
00893 continue;
00894
00895 bpe = &(ngs->bp_table[candp->bp]);
00896
00897
00898 candp->score -=
00899 ngram_search_exit_score
00900 (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
00901 E_DEBUG(4, ("candp->score %d\n", candp->score));
00902
00903
00904
00905
00906
00907
00908
00909 if (ngs->last_ltrans[candp->wid].sf != bpe->frame + 1) {
00910
00911
00912 for (j = 0; j < n_cand_sf; j++) {
00913 if (ngs->cand_sf[j].bp_ef == bpe->frame)
00914 break;
00915 }
00916
00917 if (j < n_cand_sf)
00918 candp->next = ngs->cand_sf[j].cand;
00919 else {
00920
00921 if (n_cand_sf >= ngs->cand_sf_alloc) {
00922 if (ngs->cand_sf_alloc == 0) {
00923 ngs->cand_sf =
00924 ckd_calloc(CAND_SF_ALLOCSIZE,
00925 sizeof(*ngs->cand_sf));
00926 ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
00927 }
00928 else {
00929 ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
00930 ngs->cand_sf = ckd_realloc(ngs->cand_sf,
00931 ngs->cand_sf_alloc
00932 * sizeof(*ngs->cand_sf));
00933 E_INFO("cand_sf[] increased to %d entries\n",
00934 ngs->cand_sf_alloc);
00935 }
00936 }
00937
00938
00939 j = n_cand_sf++;
00940 candp->next = -1;
00941 ngs->cand_sf[j].bp_ef = bpe->frame;
00942 }
00943
00944 ngs->cand_sf[j].cand = i;
00945
00946 ngs->last_ltrans[candp->wid].dscr = WORST_SCORE;
00947 ngs->last_ltrans[candp->wid].sf = bpe->frame + 1;
00948 }
00949 }
00950
00951
00952 for (i = 0; i < n_cand_sf; i++) {
00953
00954 bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
00955 bplast = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1] - 1;
00956
00957 for (bpe = &(ngs->bp_table[bp]); bp <= bplast; bp++, bpe++) {
00958 if (!bpe->valid)
00959 continue;
00960
00961 for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
00962 int32 n_used;
00963 candp = &(ngs->lastphn_cand[j]);
00964 dscr =
00965 ngram_search_exit_score
00966 (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
00967 dscr += ngram_tg_score(ngs->lmset,
00968 dict_basewid(ps_search_dict(ngs), candp->wid),
00969 bpe->real_wid,
00970 bpe->prev_real_wid, &n_used);
00971
00972 if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
00973 ngs->last_ltrans[candp->wid].dscr = dscr;
00974 ngs->last_ltrans[candp->wid].bp = bp;
00975 }
00976 }
00977 }
00978 }
00979
00980
00981 bestscore = ngs->last_phone_best_score;
00982 for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
00983 candp->score += ngs->last_ltrans[candp->wid].dscr;
00984 candp->bp = ngs->last_ltrans[candp->wid].bp;
00985
00986 if (candp->score BETTER_THAN bestscore)
00987 bestscore = candp->score;
00988 }
00989 ngs->last_phone_best_score = bestscore;
00990
00991
00992 thresh = bestscore + ngs->lponlybeam;
00993 for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
00994 if (candp->score BETTER_THAN thresh) {
00995 w = candp->wid;
00996
00997 ngram_search_alloc_all_rc(ngs, w);
00998
00999 k = 0;
01000 for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
01001 if ((hmm_frame(&hmm->hmm) < frame_idx)
01002 || (candp->score BETTER_THAN hmm_in_score(&hmm->hmm))) {
01003 assert(hmm_frame(&hmm->hmm) != nf);
01004 hmm_enter(&hmm->hmm,
01005 candp->score, candp->bp, nf);
01006 k++;
01007 }
01008 }
01009 if (k > 0) {
01010 assert(bitvec_is_clear(ngs->word_active, w));
01011 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
01012 *(nawl++) = w;
01013 bitvec_set(ngs->word_active, w);
01014 }
01015 }
01016 }
01017 ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
01018 }
01019
01020
01021
01022
01023
01024 static void
01025 prune_word_chan(ngram_search_t *ngs, int frame_idx)
01026 {
01027 root_chan_t *rhmm;
01028 chan_t *hmm, *thmm;
01029 chan_t **phmmp;
01030 int32 nf, w, i, k;
01031 int32 newword_thresh, lastphn_thresh;
01032 int32 *awl, *nawl;
01033
01034 nf = frame_idx + 1;
01035 newword_thresh = ngs->last_phone_best_score + ngs->wbeam;
01036 lastphn_thresh = ngs->last_phone_best_score + ngs->lponlybeam;
01037
01038 awl = ngs->active_word_list[frame_idx & 0x1];
01039 nawl = ngs->active_word_list[nf & 0x1] + ngs->n_active_word[nf & 0x1];
01040
01041
01042 for (i = ngs->n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
01043 --i, w = *(awl++)) {
01044 k = 0;
01045 phmmp = &(ngs->word_chan[w]);
01046 for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
01047 assert(hmm_frame(&hmm->hmm) >= frame_idx);
01048
01049 thmm = hmm->next;
01050 if (hmm_bestscore(&hmm->hmm) BETTER_THAN lastphn_thresh) {
01051
01052 hmm_frame(&hmm->hmm) = nf;
01053 k++;
01054 phmmp = &(hmm->next);
01055
01056
01057 if (hmm_out_score(&hmm->hmm) BETTER_THAN newword_thresh) {
01058
01059 ngram_search_save_bp(ngs, frame_idx, w,
01060 hmm_out_score(&hmm->hmm),
01061 hmm_out_history(&hmm->hmm),
01062 hmm->info.rc_id);
01063 }
01064 }
01065 else if (hmm_frame(&hmm->hmm) == nf) {
01066 phmmp = &(hmm->next);
01067 }
01068 else {
01069 hmm_deinit(&hmm->hmm);
01070 listelem_free(ngs->chan_alloc, hmm);
01071 *phmmp = thmm;
01072 }
01073 }
01074 if ((k > 0) && (bitvec_is_clear(ngs->word_active, w))) {
01075 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
01076 *(nawl++) = w;
01077 bitvec_set(ngs->word_active, w);
01078 }
01079 }
01080 ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
01081
01082
01083
01084
01085
01086 for (i = 0; i < ngs->n_1ph_words; i++) {
01087 w = ngs->single_phone_wid[i];
01088 rhmm = (root_chan_t *) ngs->word_chan[w];
01089 E_DEBUG(3,("Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
01090 dict_wordstr(ps_search_dict(ngs),w),
01091 hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm),
01092 lastphn_thresh, hmm_out_score(&rhmm->hmm), newword_thresh));
01093 if (hmm_frame(&rhmm->hmm) < frame_idx)
01094 continue;
01095 if (hmm_bestscore(&rhmm->hmm) BETTER_THAN lastphn_thresh) {
01096 hmm_frame(&rhmm->hmm) = nf;
01097
01098
01099 if (hmm_out_score(&rhmm->hmm) BETTER_THAN newword_thresh) {
01100 E_DEBUG(4,("Exiting single phone word %s with %d > %d, %d\n",
01101 dict_wordstr(ps_search_dict(ngs),w),
01102 hmm_out_score(&rhmm->hmm),
01103 lastphn_thresh, newword_thresh));
01104 ngram_search_save_bp(ngs, frame_idx, w,
01105 hmm_out_score(&rhmm->hmm),
01106 hmm_out_history(&rhmm->hmm), 0);
01107 }
01108 }
01109 }
01110 }
01111
01112 static void
01113 prune_channels(ngram_search_t *ngs, int frame_idx)
01114 {
01115
01116 ngs->n_lastphn_cand = 0;
01117
01118 ngs->dynamic_beam = ngs->beam;
01119 if (ngs->maxhmmpf != -1
01120 && ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval > ngs->maxhmmpf) {
01121
01122 int32 bins[256], bw, nhmms, i;
01123 root_chan_t *rhmm;
01124 chan_t **acl, *hmm;
01125
01126
01127 bw = -ngs->beam / 256;
01128 memset(bins, 0, sizeof(bins));
01129
01130 for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
01131 int32 b;
01132
01133
01134 b = (ngs->best_score - hmm_bestscore(&rhmm->hmm)) / bw;
01135 if (b >= 256)
01136 b = 255;
01137 ++bins[b];
01138 }
01139
01140 acl = ngs->active_chan_list[frame_idx & 0x1];
01141 for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++);
01142 i > 0; --i, hmm = *(acl++)) {
01143 int32 b;
01144
01145
01146 b = (ngs->best_score - hmm_bestscore(&hmm->hmm)) / bw;
01147 if (b >= 256)
01148 b = 255;
01149 ++bins[b];
01150 }
01151
01152 for (i = nhmms = 0; i < 256; ++i) {
01153 nhmms += bins[i];
01154 if (nhmms > ngs->maxhmmpf)
01155 break;
01156 }
01157 ngs->dynamic_beam = -(i * bw);
01158 }
01159
01160 prune_root_chan(ngs, frame_idx);
01161 prune_nonroot_chan(ngs, frame_idx);
01162 last_phone_transition(ngs, frame_idx);
01163 prune_word_chan(ngs, frame_idx);
01164 }
01165
01166
01167
01168
01169
01170 static void
01171 bptable_maxwpf(ngram_search_t *ngs, int frame_idx)
01172 {
01173 int32 bp, n;
01174 int32 bestscr, worstscr;
01175 bptbl_t *bpe, *bestbpe, *worstbpe;
01176
01177
01178 if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
01179 return;
01180
01181
01182 bestscr = (int32) 0x80000000;
01183 bestbpe = NULL;
01184 n = 0;
01185 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01186 bpe = &(ngs->bp_table[bp]);
01187 if (dict_filler_word(ps_search_dict(ngs), bpe->wid)) {
01188 if (bpe->score BETTER_THAN bestscr) {
01189 bestscr = bpe->score;
01190 bestbpe = bpe;
01191 }
01192 bpe->valid = FALSE;
01193 n++;
01194 }
01195 }
01196
01197 if (bestbpe != NULL) {
01198 bestbpe->valid = TRUE;
01199 --n;
01200 }
01201
01202
01203 n = (ngs->bpidx
01204 - ngs->bp_table_idx[frame_idx]) - n;
01205 for (; n > ngs->maxwpf; --n) {
01206
01207 worstscr = (int32) 0x7fffffff;
01208 worstbpe = NULL;
01209 for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
01210 bpe = &(ngs->bp_table[bp]);
01211 if (bpe->valid && (bpe->score WORSE_THAN worstscr)) {
01212 worstscr = bpe->score;
01213 worstbpe = bpe;
01214 }
01215 }
01216
01217 if (worstbpe == NULL)
01218 E_FATAL("PANIC: No worst BPtable entry remaining\n");
01219 worstbpe->valid = 0;
01220 }
01221 }
01222
01223 static void
01224 word_transition(ngram_search_t *ngs, int frame_idx)
01225 {
01226 int32 i, k, bp, w, nf;
01227 int32 rc;
01228 int32 *rcss;
01229 int32 thresh, newscore;
01230 bptbl_t *bpe;
01231 root_chan_t *rhmm;
01232 struct bestbp_rc_s *bestbp_rc_ptr;
01233 phone_loop_search_t *pls;
01234 dict_t *dict = ps_search_dict(ngs);
01235 dict2pid_t *d2p = ps_search_dict2pid(ngs);
01236
01237
01238
01239
01240
01241
01242 for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
01243 ngs->bestbp_rc[i].score = WORST_SCORE;
01244 k = 0;
01245 pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
01246
01247
01248 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01249 bpe = &(ngs->bp_table[bp]);
01250 ngs->word_lat_idx[bpe->wid] = NO_BP;
01251
01252 if (bpe->wid == ps_search_finish_wid(ngs))
01253 continue;
01254 k++;
01255
01256
01257
01258
01259
01260 rcss = &(ngs->bscore_stack[bpe->s_idx]);
01261 if (bpe->last2_phone == -1) {
01262
01263 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
01264 if (rcss[0] BETTER_THAN ngs->bestbp_rc[rc].score) {
01265 E_DEBUG(4,("bestbp_rc[0] = %d lc %d\n", rcss[0], bpe->last_phone));
01266 ngs->bestbp_rc[rc].score = rcss[0];
01267 ngs->bestbp_rc[rc].path = bp;
01268 ngs->bestbp_rc[rc].lc = bpe->last_phone;
01269 }
01270 }
01271 }
01272 else {
01273 xwdssid_t *rssid = dict2pid_rssid(d2p, bpe->last_phone, bpe->last2_phone);
01274 for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
01275 if (rcss[rssid->cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) {
01276 E_DEBUG(4,("bestbp_rc[%d] = %d lc %d\n",
01277 rc, rcss[rssid->cimap[rc]], bpe->last_phone));
01278 ngs->bestbp_rc[rc].score = rcss[rssid->cimap[rc]];
01279 ngs->bestbp_rc[rc].path = bp;
01280 ngs->bestbp_rc[rc].lc = bpe->last_phone;
01281 }
01282 }
01283 }
01284 }
01285 if (k == 0)
01286 return;
01287
01288 nf = frame_idx + 1;
01289 thresh = ngs->best_score + ngs->dynamic_beam;
01290
01291
01292
01293
01294 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01295 bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->ciphone]);
01296
01297 newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip
01298 + phone_loop_search_score(pls, rhmm->ciphone);
01299 if (newscore BETTER_THAN thresh) {
01300 if ((hmm_frame(&rhmm->hmm) < frame_idx)
01301 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01302 hmm_enter(&rhmm->hmm, newscore,
01303 bestbp_rc_ptr->path, nf);
01304
01305
01306 hmm_mpx_ssid(&rhmm->hmm, 0) =
01307 dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, bestbp_rc_ptr->lc);
01308 assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
01309 }
01310 }
01311 }
01312
01313
01314
01315
01316
01317 for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01318 w = ngs->single_phone_wid[i];
01319 ngs->last_ltrans[w].dscr = (int32) 0x80000000;
01320 }
01321 for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01322 bpe = &(ngs->bp_table[bp]);
01323 if (!bpe->valid)
01324 continue;
01325
01326 for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01327 int32 n_used;
01328 w = ngs->single_phone_wid[i];
01329 newscore = ngram_search_exit_score
01330 (ngs, bpe, dict_first_phone(dict, w));
01331 E_DEBUG(4, ("initial newscore for %s: %d\n",
01332 dict_wordstr(dict, w), newscore));
01333 newscore += ngram_tg_score(ngs->lmset,
01334 dict_basewid(dict, w),
01335 bpe->real_wid,
01336 bpe->prev_real_wid, &n_used);
01337
01338 if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) {
01339 ngs->last_ltrans[w].dscr = newscore;
01340 ngs->last_ltrans[w].bp = bp;
01341 }
01342 }
01343 }
01344
01345
01346 for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01347 w = ngs->single_phone_wid[i];
01348 rhmm = (root_chan_t *) ngs->word_chan[w];
01349 newscore = ngs->last_ltrans[w].dscr + ngs->pip
01350 + phone_loop_search_score(pls, rhmm->ciphone);
01351 if (newscore BETTER_THAN thresh) {
01352 bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
01353 if ((hmm_frame(&rhmm->hmm) < frame_idx)
01354 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01355 hmm_enter(&rhmm->hmm,
01356 newscore, ngs->last_ltrans[w].bp, nf);
01357
01358
01359 hmm_mpx_ssid(&rhmm->hmm, 0) =
01360 dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
01361 dict_last_phone(dict, bpe->wid));
01362 assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
01363 }
01364 }
01365 }
01366
01367
01368 w = ps_search_silence_wid(ngs);
01369 rhmm = (root_chan_t *) ngs->word_chan[w];
01370 bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
01371 newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip
01372 + phone_loop_search_score(pls, rhmm->ciphone);
01373 if (newscore BETTER_THAN thresh) {
01374 if ((hmm_frame(&rhmm->hmm) < frame_idx)
01375 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01376 hmm_enter(&rhmm->hmm,
01377 newscore, bestbp_rc_ptr->path, nf);
01378 }
01379 }
01380 for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) {
01381 if (w == ps_search_silence_wid(ngs))
01382 continue;
01383 rhmm = (root_chan_t *) ngs->word_chan[w];
01384
01385 if (rhmm == NULL)
01386 continue;
01387 newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip
01388 + phone_loop_search_score(pls, rhmm->ciphone);
01389 if (newscore BETTER_THAN thresh) {
01390 if ((hmm_frame(&rhmm->hmm) < frame_idx)
01391 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01392 hmm_enter(&rhmm->hmm,
01393 newscore, bestbp_rc_ptr->path, nf);
01394 }
01395 }
01396 }
01397 }
01398
01399 static void
01400 deactivate_channels(ngram_search_t *ngs, int frame_idx)
01401 {
01402 root_chan_t *rhmm;
01403 int i;
01404
01405
01406 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01407 if (hmm_frame(&rhmm->hmm) == frame_idx) {
01408 hmm_clear_scores(&rhmm->hmm);
01409 }
01410 }
01411
01412 for (i = 0; i < ngs->n_1ph_words; i++) {
01413 int32 w = ngs->single_phone_wid[i];
01414 rhmm = (root_chan_t *) ngs->word_chan[w];
01415 if (hmm_frame(&rhmm->hmm) == frame_idx) {
01416 hmm_clear_scores(&rhmm->hmm);
01417 }
01418 }
01419 }
01420
01421 int
01422 ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
01423 {
01424 int16 const *senscr;
01425
01426
01427 if (!ps_search_acmod(ngs)->compallsen)
01428 compute_sen_active(ngs, frame_idx);
01429
01430
01431 if ((senscr = acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
01432 return 0;
01433 ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
01434
01435
01436 ngram_search_mark_bptable(ngs, frame_idx);
01437
01438
01439
01440 if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE)
01441 return 0;
01442
01443 if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
01444 E_INFO("Renormalizing Scores at frame %d, best score %d\n",
01445 frame_idx, ngs->best_score);
01446 renormalize_scores(ngs, frame_idx, ngs->best_score);
01447 }
01448
01449
01450 evaluate_channels(ngs, senscr, frame_idx);
01451
01452 prune_channels(ngs, frame_idx);
01453
01454 bptable_maxwpf(ngs, frame_idx);
01455
01456 word_transition(ngs, frame_idx);
01457
01458 deactivate_channels(ngs, frame_idx);
01459
01460 ++ngs->n_frame;
01461
01462 return 1;
01463 }
01464
01465 void
01466 ngram_fwdtree_finish(ngram_search_t *ngs)
01467 {
01468 int32 i, w, cf, *awl;
01469 root_chan_t *rhmm;
01470 chan_t *hmm, **acl;
01471
01472
01473 cf = ps_search_acmod(ngs)->output_frame;
01474
01475 ngram_search_mark_bptable(ngs, cf);
01476
01477
01478
01479 for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01480 hmm_clear(&rhmm->hmm);
01481 }
01482
01483
01484 i = ngs->n_active_chan[cf & 0x1];
01485 acl = ngs->active_chan_list[cf & 0x1];
01486 for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
01487 hmm_clear(&hmm->hmm);
01488 }
01489
01490
01491 i = ngs->n_active_word[cf & 0x1];
01492 awl = ngs->active_word_list[cf & 0x1];
01493 for (w = *(awl++); i > 0; --i, w = *(awl++)) {
01494
01495 if (dict_is_single_phone(ps_search_dict(ngs), w))
01496 continue;
01497 bitvec_clear(ngs->word_active, w);
01498 if (ngs->word_chan[w] == NULL)
01499 continue;
01500 ngram_search_free_all_rc(ngs, w);
01501 }
01502
01503
01504
01505
01506
01507
01508
01509
01510
01511
01512 if (cf > 0) {
01513 E_INFO("%8d words recognized (%d/fr)\n",
01514 ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
01515 E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
01516 (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
01517 E_INFO("%8d channels searched (%d/fr), %d 1st, %d last\n",
01518 ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval,
01519 (ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval) / (cf + 1),
01520 ngs->st.n_root_chan_eval, ngs->st.n_last_chan_eval);
01521 E_INFO("%8d words for which last channels evaluated (%d/fr)\n",
01522 ngs->st.n_word_lastchan_eval,
01523 ngs->st.n_word_lastchan_eval / (cf + 1));
01524 E_INFO("%8d candidate words for entering last phone (%d/fr)\n",
01525 ngs->st.n_lastphn_cand_utt, ngs->st.n_lastphn_cand_utt / (cf + 1));
01526 }
01527 }