• Main Page
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/ngram_search_fwdtree.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 /* System headers. */
00043 #include <string.h>
00044 #include <assert.h>
00045 
00046 /* SphinxBase headers. */
00047 #include <ckd_alloc.h>
00048 #include <listelem_alloc.h>
00049 #include <err.h>
00050 
00051 /* Local headers. */
00052 #include "ngram_search_fwdtree.h"
00053 #include "phone_loop_search.h"
00054 
00055 /* Turn this on to dump channels for debugging */
00056 #define __CHAN_DUMP__           0
00057 #if __CHAN_DUMP__
00058 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
00059 #else
00060 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
00061 #endif
00062 
00063 /*
00064  * Allocate that part of the search channel tree structure that is independent of the
00065  * LM in use.
00066  */
00067 static void
00068 init_search_tree(ngram_search_t *ngs)
00069 {
00070     int32 w, ndiph, i, n_words, n_ci;
00071     dict_t *dict = ps_search_dict(ngs);
00072     bitvec_t *dimap;
00073 
00074     n_words = ps_search_n_words(ngs);
00075     ngs->homophone_set = ckd_calloc(n_words, sizeof(*ngs->homophone_set));
00076 
00077     /* Find #single phone words, and #unique first diphones (#root channels) in dict. */
00078     ndiph = 0;
00079     ngs->n_1ph_words = 0;
00080     n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef);
00081     /* Allocate a bitvector with flags for each possible diphone. */
00082     dimap = bitvec_alloc(n_ci * n_ci);
00083     for (w = 0; w < n_words; w++) {
00084         if (!dict_real_word(dict, w))
00085             continue;
00086         if (dict_is_single_phone(dict, w))
00087             ++ngs->n_1ph_words;
00088         else {
00089             int ph0, ph1;
00090             ph0 = dict_first_phone(dict, w);
00091             ph1 = dict_second_phone(dict, w);
00092             /* Increment ndiph the first time we see a diphone. */
00093             if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) {
00094                 bitvec_set(dimap, ph0 * n_ci + ph1);
00095                 ++ndiph;
00096             }
00097         }
00098     }
00099     E_INFO("%d unique initial diphones\n", ndiph);
00100     bitvec_free(dimap);
00101 
00102     /* Add remaining dict words (</s>, <s>, <sil>, noise words) to single-phone words */
00103     ngs->n_1ph_words += dict_num_fillers(dict) + 2;
00104     ngs->n_root_chan_alloc = ndiph + 1;
00105     /* Verify that these are all *actually* single-phone words,
00106      * otherwise really bad things will happen to us. */
00107     for (w = 0; w < n_words; ++w) {
00108         if (dict_real_word(dict, w))
00109             continue;
00110         if (!dict_is_single_phone(dict, w)) {
00111             E_WARN("Filler word %d = %s has more than one phone, ignoring it.\n",
00112                    w, dict_wordstr(dict, w));
00113             --ngs->n_1ph_words;
00114         }
00115     }
00116 
00117     /* Allocate and initialize root channels */
00118     ngs->root_chan =
00119         ckd_calloc(ngs->n_root_chan_alloc, sizeof(*ngs->root_chan));
00120     for (i = 0; i < ngs->n_root_chan_alloc; i++) {
00121         hmm_init(ngs->hmmctx, &ngs->root_chan[i].hmm, TRUE, -1, -1);
00122         ngs->root_chan[i].penult_phn_wid = -1;
00123         ngs->root_chan[i].next = NULL;
00124     }
00125 
00126     /* Permanently allocate and initialize channels for single-phone
00127      * words (1/word). */
00128     ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
00129     i = 0;
00130     for (w = 0; w < n_words; w++) {
00131         if (!dict_is_single_phone(dict, w))
00132             continue;
00133         /* Use SIL as right context for these. */
00134         ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
00135         ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w);
00136         hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
00137                  bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone),
00138                  ngs->rhmm_1ph[i].ciphone);
00139         ngs->rhmm_1ph[i].next = NULL;
00140 
00141         ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
00142         i++;
00143     }
00144 
00145     ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words,
00146                                        sizeof(*ngs->single_phone_wid));
00147     E_INFO("%d root, %d non-root channels, %d single-phone words\n",
00148            ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00149 }
00150 
00151 /*
00152  * One-time initialization of internal channels in HMM tree.
00153  */
00154 static void
00155 init_nonroot_chan(ngram_search_t *ngs, chan_t * hmm, int32 ph, int32 ci)
00156 {
00157     hmm->next = NULL;
00158     hmm->alt = NULL;
00159     hmm->info.penult_phn_wid = -1;
00160     hmm->ciphone = ci;
00161     hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, ph, ci);
00162 }
00163 
00164 /*
00165  * Allocate and initialize search channel-tree structure.
00166  * At this point, all the root-channels have been allocated and partly initialized
00167  * (as per init_search_tree()), and channels for all the single-phone words have been
00168  * allocated and initialized.  None of the interior channels of search-trees have
00169  * been allocated.
00170  * This routine may be called on every utterance, after reinit_search_tree() clears
00171  * the search tree created for the previous utterance.  Meant for reconfiguring the
00172  * search tree to suit the currently active LM.
00173  */
00174 static void
00175 create_search_tree(ngram_search_t *ngs)
00176 {
00177     chan_t *hmm;
00178     root_chan_t *rhmm;
00179     int32 w, i, j, p, ph;
00180     int32 n_words;
00181     dict_t *dict = ps_search_dict(ngs);
00182     dict2pid_t *d2p = ps_search_dict2pid(ngs);
00183 
00184     n_words = ps_search_n_words(ngs);
00185 
00186     E_INFO("Creating search tree\n");
00187 
00188     for (w = 0; w < n_words; w++)
00189         ngs->homophone_set[w] = -1;
00190 
00191     E_INFO("before: %d root, %d non-root channels, %d single-phone words\n",
00192            ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00193 
00194     ngs->n_1ph_LMwords = 0;
00195     ngs->n_root_chan = 0;
00196     ngs->n_nonroot_chan = 0;
00197 
00198     for (w = 0; w < n_words; w++) {
00199         int ciphone, ci2phone;
00200 
00201         /* Ignore dictionary words not in LM */
00202         if (!ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
00203             continue;
00204 
00205         /* Handle single-phone words individually; not in channel tree */
00206         if (dict_is_single_phone(dict, w)) {
00207             E_DEBUG(1,("single_phone_wid[%d] = %s\n",
00208                        ngs->n_1ph_LMwords, dict_wordstr(dict, w)));
00209             ngs->single_phone_wid[ngs->n_1ph_LMwords++] = w;
00210             continue;
00211         }
00212 
00213         /* Find a root channel matching the initial diphone, or
00214          * allocate one if not found. */
00215         ciphone = dict_first_phone(dict, w);
00216         ci2phone = dict_second_phone(dict, w);
00217         for (i = 0; i < ngs->n_root_chan; ++i) {
00218             if (ngs->root_chan[i].ciphone == ciphone
00219                 && ngs->root_chan[i].ci2phone == ci2phone)
00220                 break;
00221         }
00222         if (i == ngs->n_root_chan) {
00223             rhmm = &(ngs->root_chan[ngs->n_root_chan]);
00224             rhmm->hmm.tmatid = ciphone;
00225             /* Begin with CI phone?  Not sure this makes a difference... */
00226             hmm_mpx_ssid(&rhmm->hmm, 0) =
00227                 bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone);
00228             rhmm->ciphone = ciphone;
00229             rhmm->ci2phone = ci2phone;
00230             ngs->n_root_chan++;
00231         }
00232         else
00233             rhmm = &(ngs->root_chan[i]);
00234 
00235         E_DEBUG(2, ("word %s rhmm %d\n", dict_wordstr(dict, w), rhmm - ngs->root_chan));
00236         /* Now, rhmm = root channel for w.  Go on to remaining phones */
00237         if (dict_pronlen(dict, w) == 2) {
00238             /* Next phone is the last; not kept in tree; add w to penult_phn_wid set */
00239             if ((j = rhmm->penult_phn_wid) < 0)
00240                 rhmm->penult_phn_wid = w;
00241             else {
00242                 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
00243                 ngs->homophone_set[j] = w;
00244             }
00245         }
00246         else {
00247             /* Add remaining phones, except the last, to tree */
00248             ph = dict2pid_internal(d2p, w, 1);
00249             hmm = rhmm->next;
00250             if (hmm == NULL) {
00251                 rhmm->next = hmm = listelem_malloc(ngs->chan_alloc);
00252                 init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1));
00253                 ngs->n_nonroot_chan++;
00254             }
00255             else {
00256                 chan_t *prev_hmm = NULL;
00257 
00258                 for (; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph); hmm = hmm->alt)
00259                     prev_hmm = hmm;
00260                 if (!hmm) {     /* thanks, rkm! */
00261                     prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
00262                     init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1));
00263                     ngs->n_nonroot_chan++;
00264                 }
00265             }
00266             E_DEBUG(3,("phone %s = %d\n",
00267                        bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
00268                                             dict_second_phone(dict, w)), ph));
00269             for (p = 2; p < dict_pronlen(dict, w) - 1; p++) {
00270                 ph = dict2pid_internal(d2p, w, p);
00271                 if (!hmm->next) {
00272                     hmm->next = listelem_malloc(ngs->chan_alloc);
00273                     hmm = hmm->next;
00274                     init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p));
00275                     ngs->n_nonroot_chan++;
00276                 }
00277                 else {
00278                     chan_t *prev_hmm = NULL;
00279 
00280                     for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph);
00281                          hmm = hmm->alt)
00282                         prev_hmm = hmm;
00283                     if (!hmm) { /* thanks, rkm! */
00284                         prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc);
00285                         init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p));
00286                         ngs->n_nonroot_chan++;
00287                     }
00288                 }
00289                 E_DEBUG(3,("phone %s = %d\n",
00290                            bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef,
00291                                                 dict_pron(dict, w, p)), ph));
00292             }
00293 
00294             /* All but last phone of w in tree; add w to hmm->info.penult_phn_wid set */
00295             if ((j = hmm->info.penult_phn_wid) < 0)
00296                 hmm->info.penult_phn_wid = w;
00297             else {
00298                 for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]);
00299                 ngs->homophone_set[j] = w;
00300             }
00301         }
00302     }
00303 
00304     ngs->n_1ph_words = ngs->n_1ph_LMwords;
00305 
00306     /* Add filler words to the array of 1ph words. */
00307     for (w = 0; w < n_words; ++w) {
00308         /* Skip anything that doesn't actually have a single phone. */
00309         if (!dict_is_single_phone(dict, w))
00310             continue;
00311         /* Also skip "real words" and things that are in the LM. */
00312         if (dict_real_word(dict, w))
00313             continue;
00314         if (ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w)))
00315             continue;
00316         E_DEBUG(1,("single_phone_wid[%d] = %s\n",
00317                    ngs->n_1ph_words, dict_wordstr(dict, w)));
00318         ngs->single_phone_wid[ngs->n_1ph_words++] = w;
00319     }
00320 
00321     if (ngs->n_nonroot_chan >= ngs->max_nonroot_chan) {
00322         /* Give some room for channels for new words added dynamically at run time */
00323         ngs->max_nonroot_chan = ngs->n_nonroot_chan + 128;
00324         E_INFO("after: max nonroot chan increased to %d\n", ngs->max_nonroot_chan);
00325 
00326         /* Free old active channel list array if any and allocate new one */
00327         if (ngs->active_chan_list)
00328             ckd_free_2d(ngs->active_chan_list);
00329         ngs->active_chan_list = ckd_calloc_2d(2, ngs->max_nonroot_chan,
00330                                               sizeof(**ngs->active_chan_list));
00331     }
00332 
00333     E_INFO("after: %d root, %d non-root channels, %d single-phone words\n",
00334            ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words);
00335 }
00336 
00337 static void
00338 reinit_search_subtree(ngram_search_t *ngs, chan_t * hmm)
00339 {
00340     chan_t *child, *sibling;
00341 
00342     /* First free all children under hmm */
00343     for (child = hmm->next; child; child = sibling) {
00344         sibling = child->alt;
00345         reinit_search_subtree(ngs, child);
00346     }
00347 
00348     /* Now free hmm */
00349     hmm_deinit(&hmm->hmm);
00350     listelem_free(ngs->chan_alloc, hmm);
00351 }
00352 
00353 /*
00354  * Delete search tree by freeing all interior channels within search tree and
00355  * restoring root channel state to the init state (i.e., just after init_search_tree()).
00356  */
00357 static void
00358 reinit_search_tree(ngram_search_t *ngs)
00359 {
00360     int32 i;
00361     chan_t *hmm, *sibling;
00362 
00363     for (i = 0; i < ngs->n_root_chan; i++) {
00364         hmm = ngs->root_chan[i].next;
00365 
00366         while (hmm) {
00367             sibling = hmm->alt;
00368             reinit_search_subtree(ngs, hmm);
00369             hmm = sibling;
00370         }
00371 
00372         ngs->root_chan[i].penult_phn_wid = -1;
00373         ngs->root_chan[i].next = NULL;
00374     }
00375     ngs->n_nonroot_chan = 0;
00376 }
00377 
00378 void
00379 ngram_fwdtree_init(ngram_search_t *ngs)
00380 {
00381     /* Allocate bestbp_rc, lastphn_cand, last_ltrans */
00382     ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef),
00383                                 sizeof(*ngs->bestbp_rc));
00384     ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
00385                                    sizeof(*ngs->lastphn_cand));
00386     init_search_tree(ngs);
00387     create_search_tree(ngs);
00388 }
00389 
00390 static void
00391 deinit_search_tree(ngram_search_t *ngs)
00392 {
00393     int i, w, n_words;
00394 
00395     n_words = ps_search_n_words(ngs);
00396     for (i = 0; i < ngs->n_root_chan_alloc; i++) {
00397         hmm_deinit(&ngs->root_chan[i].hmm);
00398     }
00399     if (ngs->rhmm_1ph) {
00400         for (i = w = 0; w < n_words; ++w) {
00401             if (!dict_is_single_phone(ps_search_dict(ngs), w))
00402                 continue;
00403             hmm_deinit(&ngs->rhmm_1ph[i].hmm);
00404             ++i;
00405         }
00406         ckd_free(ngs->rhmm_1ph);
00407         ngs->rhmm_1ph = NULL;
00408     }
00409     ngs->n_root_chan = 0;
00410     ngs->n_root_chan_alloc = 0;
00411     ckd_free(ngs->root_chan);
00412     ngs->root_chan = NULL;
00413     ckd_free(ngs->single_phone_wid);
00414     ngs->single_phone_wid = NULL;
00415     ckd_free(ngs->homophone_set);
00416     ngs->homophone_set = NULL;
00417 }
00418 
00419 void
00420 ngram_fwdtree_deinit(ngram_search_t *ngs)
00421 {
00422     /* Reset non-root channels. */
00423     reinit_search_tree(ngs);
00424     /* Free the search tree. */
00425     deinit_search_tree(ngs);
00426     /* Free other stuff. */
00427     ngs->max_nonroot_chan = 0;
00428     ckd_free_2d(ngs->active_chan_list);
00429     ngs->active_chan_list = NULL;
00430     ckd_free(ngs->cand_sf);
00431     ngs->cand_sf = NULL;
00432     ckd_free(ngs->bestbp_rc);
00433     ngs->bestbp_rc = NULL;
00434     ckd_free(ngs->lastphn_cand);
00435     ngs->lastphn_cand = NULL;
00436 }
00437 
00438 int
00439 ngram_fwdtree_reinit(ngram_search_t *ngs)
00440 {
00441     /* Reset non-root channels. */
00442     reinit_search_tree(ngs);
00443     /* Free the search tree. */
00444     deinit_search_tree(ngs);
00445     /* Reallocate things that depend on the number of words. */
00446     ckd_free(ngs->lastphn_cand);
00447     ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs),
00448                                    sizeof(*ngs->lastphn_cand));
00449     ckd_free(ngs->word_chan);
00450     ngs->word_chan = ckd_calloc(ps_search_n_words(ngs),
00451                                 sizeof(*ngs->word_chan));
00452     /* Rebuild the search tree. */
00453     init_search_tree(ngs);
00454     create_search_tree(ngs);
00455     return 0;
00456 }
00457 
00458 void
00459 ngram_fwdtree_start(ngram_search_t *ngs)
00460 {
00461     ps_search_t *base = (ps_search_t *)ngs;
00462     int32 i, w, n_words;
00463     root_chan_t *rhmm;
00464 
00465     n_words = ps_search_n_words(ngs);
00466 
00467     /* Reset utterance statistics. */
00468     memset(&ngs->st, 0, sizeof(ngs->st));
00469 
00470     /* Reset backpointer table. */
00471     ngs->bpidx = 0;
00472     ngs->bss_head = 0;
00473 
00474     /* Reset word lattice. */
00475     for (i = 0; i < n_words; ++i)
00476         ngs->word_lat_idx[i] = NO_BP;
00477 
00478     /* Reset active HMM and word lists. */
00479     ngs->n_active_chan[0] = ngs->n_active_chan[1] = 0;
00480     ngs->n_active_word[0] = ngs->n_active_word[1] = 0;
00481 
00482     /* Reset scores. */
00483     ngs->best_score = 0;
00484     ngs->renormalized = 0;
00485 
00486     /* Reset other stuff. */
00487     for (i = 0; i < n_words; i++)
00488         ngs->last_ltrans[i].sf = -1;
00489     ngs->n_frame = 0;
00490 
00491     /* Clear the hypothesis string. */
00492     ckd_free(base->hyp_str);
00493     base->hyp_str = NULL;
00494 
00495     /* Reset the permanently allocated single-phone words, since they
00496      * may have junk left over in them from FWDFLAT. */
00497     for (i = 0; i < ngs->n_1ph_words; i++) {
00498         w = ngs->single_phone_wid[i];
00499         rhmm = (root_chan_t *) ngs->word_chan[w];
00500         hmm_clear(&rhmm->hmm);
00501     }
00502 
00503     /* Start search with <s>; word_chan[<s>] is permanently allocated */
00504     rhmm = (root_chan_t *) ngs->word_chan[dict_startwid(ps_search_dict(ngs))];
00505     hmm_clear(&rhmm->hmm);
00506     hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
00507 }
00508 
00509 /*
00510  * Mark the active senones for all senones belonging to channels that are active in the
00511  * current frame.
00512  */
00513 static void
00514 compute_sen_active(ngram_search_t *ngs, int frame_idx)
00515 {
00516     root_chan_t *rhmm;
00517     chan_t *hmm, **acl;
00518     int32 i, w, *awl;
00519 
00520     acmod_clear_active(ps_search_acmod(ngs));
00521 
00522     /* Flag active senones for root channels */
00523     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00524         if (hmm_frame(&rhmm->hmm) == frame_idx)
00525             acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00526     }
00527 
00528     /* Flag active senones for nonroot channels in HMM tree */
00529     i = ngs->n_active_chan[frame_idx & 0x1];
00530     acl = ngs->active_chan_list[frame_idx & 0x1];
00531     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00532         acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00533     }
00534 
00535     /* Flag active senones for individual word channels */
00536     i = ngs->n_active_word[frame_idx & 0x1];
00537     awl = ngs->active_word_list[frame_idx & 0x1];
00538     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00539         for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00540             acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00541         }
00542     }
00543     for (i = 0; i < ngs->n_1ph_words; i++) {
00544         w = ngs->single_phone_wid[i];
00545         rhmm = (root_chan_t *) ngs->word_chan[w];
00546 
00547         if (hmm_frame(&rhmm->hmm) == frame_idx)
00548             acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00549     }
00550 }
00551 
00552 static void
00553 renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
00554 {
00555     root_chan_t *rhmm;
00556     chan_t *hmm, **acl;
00557     int32 i, w, *awl;
00558 
00559     /* Renormalize root channels */
00560     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00561         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00562             hmm_normalize(&rhmm->hmm, norm);
00563         }
00564     }
00565 
00566     /* Renormalize nonroot channels in HMM tree */
00567     i = ngs->n_active_chan[frame_idx & 0x1];
00568     acl = ngs->active_chan_list[frame_idx & 0x1];
00569     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00570         hmm_normalize(&hmm->hmm, norm);
00571     }
00572 
00573     /* Renormalize individual word channels */
00574     i = ngs->n_active_word[frame_idx & 0x1];
00575     awl = ngs->active_word_list[frame_idx & 0x1];
00576     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00577         for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00578             hmm_normalize(&hmm->hmm, norm);
00579         }
00580     }
00581     for (i = 0; i < ngs->n_1ph_words; i++) {
00582         w = ngs->single_phone_wid[i];
00583         rhmm = (root_chan_t *) ngs->word_chan[w];
00584         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00585             hmm_normalize(&rhmm->hmm, norm);
00586         }
00587     }
00588 
00589     ngs->renormalized = TRUE;
00590 }
00591 
00592 static int32
00593 eval_root_chan(ngram_search_t *ngs, int frame_idx)
00594 {
00595     root_chan_t *rhmm;
00596     int32 i, bestscore;
00597 
00598     bestscore = WORST_SCORE;
00599     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
00600         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00601             int32 score = chan_v_eval(rhmm);
00602             if (score BETTER_THAN bestscore)
00603                 bestscore = score;
00604             ++ngs->st.n_root_chan_eval;
00605         }
00606     }
00607     return (bestscore);
00608 }
00609 
00610 static int32
00611 eval_nonroot_chan(ngram_search_t *ngs, int frame_idx)
00612 {
00613     chan_t *hmm, **acl;
00614     int32 i, bestscore;
00615 
00616     i = ngs->n_active_chan[frame_idx & 0x1];
00617     acl = ngs->active_chan_list[frame_idx & 0x1];
00618     bestscore = WORST_SCORE;
00619     ngs->st.n_nonroot_chan_eval += i;
00620 
00621     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
00622         int32 score = chan_v_eval(hmm);
00623         assert(hmm_frame(&hmm->hmm) == frame_idx);
00624         if (score BETTER_THAN bestscore)
00625             bestscore = score;
00626     }
00627 
00628     return bestscore;
00629 }
00630 
00631 static int32
00632 eval_word_chan(ngram_search_t *ngs, int frame_idx)
00633 {
00634     root_chan_t *rhmm;
00635     chan_t *hmm;
00636     int32 i, w, bestscore, *awl, j, k;
00637 
00638     k = 0;
00639     bestscore = WORST_SCORE;
00640     awl = ngs->active_word_list[frame_idx & 0x1];
00641 
00642     i = ngs->n_active_word[frame_idx & 0x1];
00643     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00644         assert(bitvec_is_set(ngs->word_active, w));
00645         bitvec_clear(ngs->word_active, w);
00646         assert(ngs->word_chan[w] != NULL);
00647 
00648         for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
00649             int32 score;
00650 
00651             assert(hmm_frame(&hmm->hmm) == frame_idx);
00652             score = chan_v_eval(hmm);
00653             /*printf("eval word chan %d score %d\n", w, score); */
00654 
00655             if (score BETTER_THAN bestscore)
00656                 bestscore = score;
00657 
00658             k++;
00659         }
00660     }
00661 
00662     /* Similarly for statically allocated single-phone words */
00663     j = 0;
00664     for (i = 0; i < ngs->n_1ph_words; i++) {
00665         int32 score;
00666 
00667         w = ngs->single_phone_wid[i];
00668         rhmm = (root_chan_t *) ngs->word_chan[w];
00669         if (hmm_frame(&rhmm->hmm) < frame_idx)
00670             continue;
00671 
00672         score = chan_v_eval(rhmm);
00673         /* printf("eval 1ph word chan %d score %d\n", w, score); */
00674         if (score BETTER_THAN bestscore && w != ps_search_finish_wid(ngs))
00675             bestscore = score;
00676 
00677         j++;
00678     }
00679 
00680     ngs->st.n_last_chan_eval += k + j;
00681     ngs->st.n_nonroot_chan_eval += k + j;
00682     ngs->st.n_word_lastchan_eval +=
00683         ngs->n_active_word[frame_idx & 0x1] + j;
00684 
00685     return bestscore;
00686 }
00687 
00688 static int32
00689 evaluate_channels(ngram_search_t *ngs, int16 const *senone_scores, int frame_idx)
00690 {
00691     int32 bs;
00692 
00693     hmm_context_set_senscore(ngs->hmmctx, senone_scores);
00694     ngs->best_score = eval_root_chan(ngs, frame_idx);
00695     if ((bs = eval_nonroot_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
00696         ngs->best_score = bs;
00697     if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score)
00698         ngs->best_score = bs;
00699     ngs->last_phone_best_score = bs;
00700 
00701     return ngs->best_score;
00702 }
00703 
00704 /*
00705  * Prune currently active root channels for next frame.  Also, perform exit
00706  * transitions out of them and activate successors.
00707  * score[] of pruned root chan set to WORST_SCORE elsewhere.
00708  */
00709 static void
00710 prune_root_chan(ngram_search_t *ngs, int frame_idx)
00711 {
00712     root_chan_t *rhmm;
00713     chan_t *hmm;
00714     int32 i, nf, w;
00715     int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
00716     chan_t **nacl;              /* next active list */
00717     lastphn_cand_t *candp;
00718     phone_loop_search_t *pls;
00719 
00720     nf = frame_idx + 1;
00721     thresh = ngs->best_score + ngs->dynamic_beam;
00722     newphone_thresh = ngs->best_score + ngs->pbeam;
00723     lastphn_thresh = ngs->best_score + ngs->lpbeam;
00724     nacl = ngs->active_chan_list[nf & 0x1];
00725     pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
00726 
00727     for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
00728         E_DEBUG(3,("Root channel %d frame %d score %d thresh %d\n",
00729                    i, hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm), thresh));
00730         /* First check if this channel was active in current frame */
00731         if (hmm_frame(&rhmm->hmm) < frame_idx)
00732             continue;
00733 
00734         if (hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
00735             hmm_frame(&rhmm->hmm) = nf;  /* rhmm will be active in next frame */
00736             E_DEBUG(3, ("Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->hmm)));
00737             /* transitions out of this root channel */
00738             /* transition to all next-level channels in the HMM tree */
00739             newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip;
00740             if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
00741                 for (hmm = rhmm->next; hmm; hmm = hmm->alt) {
00742                     int32 pl_newphone_score = newphone_score
00743                         + phone_loop_search_score(pls, hmm->ciphone);
00744                     if (pl_newphone_score BETTER_THAN newphone_thresh) {
00745                         if ((hmm_frame(&hmm->hmm) < frame_idx)
00746                             || (pl_newphone_score BETTER_THAN hmm_in_score(&hmm->hmm))) {
00747                             hmm_enter(&hmm->hmm, pl_newphone_score,
00748                                       hmm_out_history(&rhmm->hmm), nf);
00749                             *(nacl++) = hmm;
00750                         }
00751                     }
00752                 }
00753             }
00754 
00755             /*
00756              * Transition to last phone of all words for which this is the
00757              * penultimate phone (the last phones may need multiple right contexts).
00758              * Remember to remove the temporary newword_penalty.
00759              */
00760             if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
00761                 for (w = rhmm->penult_phn_wid; w >= 0;
00762                      w = ngs->homophone_set[w]) {
00763                     int32 pl_newphone_score = newphone_score
00764                         + phone_loop_search_score
00765                         (pls, dict_last_phone(ps_search_dict(ngs),w));
00766                     E_DEBUG(3, ("wid %d newphone_score %d\n", w, pl_newphone_score));
00767                     if (pl_newphone_score BETTER_THAN lastphn_thresh) {
00768                         candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
00769                         ngs->n_lastphn_cand++;
00770                         candp->wid = w;
00771                         candp->score =
00772                             pl_newphone_score - ngs->nwpen;
00773                         candp->bp = hmm_out_history(&rhmm->hmm);
00774                     }
00775                 }
00776             }
00777         }
00778     }
00779     ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
00780 }
00781 
00782 /*
00783  * Prune currently active nonroot channels in HMM tree for next frame.  Also, perform
00784  * exit transitions out of such channels and activate successors.
00785  */
00786 static void
00787 prune_nonroot_chan(ngram_search_t *ngs, int frame_idx)
00788 {
00789     chan_t *hmm, *nexthmm;
00790     int32 nf, w, i;
00791     int32 thresh, newphone_thresh, lastphn_thresh, newphone_score;
00792     chan_t **acl, **nacl;       /* active list, next active list */
00793     lastphn_cand_t *candp;
00794     phone_loop_search_t *pls;
00795 
00796     nf = frame_idx + 1;
00797 
00798     thresh = ngs->best_score + ngs->dynamic_beam;
00799     newphone_thresh = ngs->best_score + ngs->pbeam;
00800     lastphn_thresh = ngs->best_score + ngs->lpbeam;
00801     pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
00802 
00803     acl = ngs->active_chan_list[frame_idx & 0x1];   /* currently active HMMs in tree */
00804     nacl = ngs->active_chan_list[nf & 0x1] + ngs->n_active_chan[nf & 0x1];
00805 
00806     for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0;
00807          --i, hmm = *(acl++)) {
00808         assert(hmm_frame(&hmm->hmm) >= frame_idx);
00809 
00810         if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
00811             /* retain this channel in next frame */
00812             if (hmm_frame(&hmm->hmm) != nf) {
00813                 hmm_frame(&hmm->hmm) = nf;
00814                 *(nacl++) = hmm;
00815             }
00816 
00817             /* transition to all next-level channel in the HMM tree */
00818             newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip;
00819             if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) {
00820                 for (nexthmm = hmm->next; nexthmm; nexthmm = nexthmm->alt) {
00821                     int32 pl_newphone_score = newphone_score
00822                         + phone_loop_search_score(pls, nexthmm->ciphone);
00823                     if ((pl_newphone_score BETTER_THAN newphone_thresh)
00824                         && ((hmm_frame(&nexthmm->hmm) < frame_idx)
00825                             || (pl_newphone_score
00826                                 BETTER_THAN hmm_in_score(&nexthmm->hmm)))) {
00827                         if (hmm_frame(&nexthmm->hmm) != nf) {
00828                             /* Keep this HMM on the active list */
00829                             *(nacl++) = nexthmm;
00830                         }
00831                         hmm_enter(&nexthmm->hmm, pl_newphone_score,
00832                                   hmm_out_history(&hmm->hmm), nf);
00833                     }
00834                 }
00835             }
00836 
00837             /*
00838              * Transition to last phone of all words for which this is the
00839              * penultimate phone (the last phones may need multiple right contexts).
00840              * Remember to remove the temporary newword_penalty.
00841              */
00842             if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) {
00843                 for (w = hmm->info.penult_phn_wid; w >= 0;
00844                      w = ngs->homophone_set[w]) {
00845                     int32 pl_newphone_score = newphone_score
00846                         + phone_loop_search_score
00847                         (pls, dict_last_phone(ps_search_dict(ngs),w));
00848                     if (pl_newphone_score BETTER_THAN lastphn_thresh) {
00849                         candp = ngs->lastphn_cand + ngs->n_lastphn_cand;
00850                         ngs->n_lastphn_cand++;
00851                         candp->wid = w;
00852                         candp->score =
00853                             pl_newphone_score - ngs->nwpen;
00854                         candp->bp = hmm_out_history(&hmm->hmm);
00855                     }
00856                 }
00857             }
00858         }
00859         else if (hmm_frame(&hmm->hmm) != nf) {
00860             hmm_clear_scores(&hmm->hmm);
00861         }
00862     }
00863     ngs->n_active_chan[nf & 0x1] = nacl - ngs->active_chan_list[nf & 0x1];
00864 }
00865 
00866 /*
00867  * Execute the transition into the last phone for all candidates words emerging from
00868  * the HMM tree.  Attach LM scores to such transitions.
00869  * (Executed after pruning root and non-root, but before pruning word-chan.)
00870  */
00871 static void
00872 last_phone_transition(ngram_search_t *ngs, int frame_idx)
00873 {
00874     int32 i, j, k, nf, bp, bplast, w;
00875     lastphn_cand_t *candp;
00876     int32 *nawl;
00877     int32 thresh;
00878     int32 bestscore, dscr;
00879     chan_t *hmm;
00880     bptbl_t *bpe;
00881     int32 n_cand_sf = 0;
00882 
00883     nf = frame_idx + 1;
00884     nawl = ngs->active_word_list[nf & 0x1];
00885     ngs->st.n_lastphn_cand_utt += ngs->n_lastphn_cand;
00886 
00887     /* For each candidate word (entering its last phone) */
00888     /* If best LM score and bp for candidate known use it, else sort cands by startfrm */
00889     E_DEBUG(3, ("n_lastphn_cand %d\n", ngs->n_lastphn_cand));
00890     for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
00891         /* This can happen if recognition fails. */
00892         if (candp->bp == -1)
00893             continue;
00894         /* Backpointer entry for it. */
00895         bpe = &(ngs->bp_table[candp->bp]);
00896 
00897         /* Subtract starting score for candidate, leave it with only word score */
00898         candp->score -=
00899             ngram_search_exit_score
00900             (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
00901         E_DEBUG(4, ("candp->score %d\n", candp->score));
00902 
00903         /*
00904          * If this candidate not occurred in an earlier frame, prepare for finding
00905          * best transition score into last phone; sort by start frame.
00906          */
00907         /* i.e. if we don't have an entry in last_ltrans for this
00908          * <word,sf>, then create one */
00909         if (ngs->last_ltrans[candp->wid].sf != bpe->frame + 1) {
00910             /* Look for an entry in cand_sf matching the backpointer
00911              * for this candidate. */
00912             for (j = 0; j < n_cand_sf; j++) {
00913                 if (ngs->cand_sf[j].bp_ef == bpe->frame)
00914                     break;
00915             }
00916             /* Oh, we found one, so chain onto it. */
00917             if (j < n_cand_sf)
00918                 candp->next = ngs->cand_sf[j].cand;
00919             else {
00920                 /* Nope, let's make a new one, allocating cand_sf if necessary. */
00921                 if (n_cand_sf >= ngs->cand_sf_alloc) {
00922                     if (ngs->cand_sf_alloc == 0) {
00923                         ngs->cand_sf =
00924                             ckd_calloc(CAND_SF_ALLOCSIZE,
00925                                        sizeof(*ngs->cand_sf));
00926                         ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE;
00927                     }
00928                     else {
00929                         ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE;
00930                         ngs->cand_sf = ckd_realloc(ngs->cand_sf,
00931                                                    ngs->cand_sf_alloc
00932                                                    * sizeof(*ngs->cand_sf));
00933                         E_INFO("cand_sf[] increased to %d entries\n",
00934                                ngs->cand_sf_alloc);
00935                     }
00936                 }
00937 
00938                 /* Use the newly created cand_sf. */
00939                 j = n_cand_sf++;
00940                 candp->next = -1; /* End of the chain. */
00941                 ngs->cand_sf[j].bp_ef = bpe->frame;
00942             }
00943             /* Update it to point to this candidate. */
00944             ngs->cand_sf[j].cand = i;
00945 
00946             ngs->last_ltrans[candp->wid].dscr = WORST_SCORE;
00947             ngs->last_ltrans[candp->wid].sf = bpe->frame + 1;
00948         }
00949     }
00950 
00951     /* Compute best LM score and bp for new cands entered in the sorted lists above */
00952     for (i = 0; i < n_cand_sf; i++) {
00953         /* For the i-th unique end frame... */
00954         bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef];
00955         bplast = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1] - 1;
00956 
00957         for (bpe = &(ngs->bp_table[bp]); bp <= bplast; bp++, bpe++) {
00958             if (!bpe->valid)
00959                 continue;
00960             /* For each candidate at the start frame find bp->cand transition-score */
00961             for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) {
00962                 int32 n_used;
00963                 candp = &(ngs->lastphn_cand[j]);
00964                 dscr = 
00965                     ngram_search_exit_score
00966                     (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid));
00967                 dscr += ngram_tg_score(ngs->lmset,
00968                                        dict_basewid(ps_search_dict(ngs), candp->wid),
00969                                        bpe->real_wid,
00970                                        bpe->prev_real_wid, &n_used);
00971 
00972                 if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) {
00973                     ngs->last_ltrans[candp->wid].dscr = dscr;
00974                     ngs->last_ltrans[candp->wid].bp = bp;
00975                 }
00976             }
00977         }
00978     }
00979 
00980     /* Update best transitions for all candidates; also update best lastphone score */
00981     bestscore = ngs->last_phone_best_score;
00982     for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) {
00983         candp->score += ngs->last_ltrans[candp->wid].dscr;
00984         candp->bp = ngs->last_ltrans[candp->wid].bp;
00985 
00986         if (candp->score BETTER_THAN bestscore)
00987             bestscore = candp->score;
00988     }
00989     ngs->last_phone_best_score = bestscore;
00990 
00991     /* At this pt, we know the best entry score (with LM component) for all candidates */
00992     thresh = bestscore + ngs->lponlybeam;
00993     for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) {
00994         if (candp->score BETTER_THAN thresh) {
00995             w = candp->wid;
00996 
00997             ngram_search_alloc_all_rc(ngs, w);
00998 
00999             k = 0;
01000             for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) {
01001                 if ((hmm_frame(&hmm->hmm) < frame_idx)
01002                     || (candp->score BETTER_THAN hmm_in_score(&hmm->hmm))) {
01003                     assert(hmm_frame(&hmm->hmm) != nf);
01004                     hmm_enter(&hmm->hmm,
01005                               candp->score, candp->bp, nf);
01006                     k++;
01007                 }
01008             }
01009             if (k > 0) {
01010                 assert(bitvec_is_clear(ngs->word_active, w));
01011                 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
01012                 *(nawl++) = w;
01013                 bitvec_set(ngs->word_active, w);
01014             }
01015         }
01016     }
01017     ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
01018 }
01019 
01020 /*
01021  * Prune currently active word channels for next frame.  Also, perform exit
01022  * transitions out of such channels and active successors.
01023  */
01024 static void
01025 prune_word_chan(ngram_search_t *ngs, int frame_idx)
01026 {
01027     root_chan_t *rhmm;
01028     chan_t *hmm, *thmm;
01029     chan_t **phmmp;             /* previous HMM-pointer */
01030     int32 nf, w, i, k;
01031     int32 newword_thresh, lastphn_thresh;
01032     int32 *awl, *nawl;
01033 
01034     nf = frame_idx + 1;
01035     newword_thresh = ngs->last_phone_best_score + ngs->wbeam;
01036     lastphn_thresh = ngs->last_phone_best_score + ngs->lponlybeam;
01037 
01038     awl = ngs->active_word_list[frame_idx & 0x1];
01039     nawl = ngs->active_word_list[nf & 0x1] + ngs->n_active_word[nf & 0x1];
01040 
01041     /* Dynamically allocated last channels of multi-phone words */
01042     for (i = ngs->n_active_word[frame_idx & 0x1], w = *(awl++); i > 0;
01043          --i, w = *(awl++)) {
01044         k = 0;
01045         phmmp = &(ngs->word_chan[w]);
01046         for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
01047             assert(hmm_frame(&hmm->hmm) >= frame_idx);
01048 
01049             thmm = hmm->next;
01050             if (hmm_bestscore(&hmm->hmm) BETTER_THAN lastphn_thresh) {
01051                 /* retain this channel in next frame */
01052                 hmm_frame(&hmm->hmm) = nf;
01053                 k++;
01054                 phmmp = &(hmm->next);
01055 
01056                 /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
01057                 if (hmm_out_score(&hmm->hmm) BETTER_THAN newword_thresh) {
01058                     /* can exit channel and recognize word */
01059                     ngram_search_save_bp(ngs, frame_idx, w,
01060                                  hmm_out_score(&hmm->hmm),
01061                                  hmm_out_history(&hmm->hmm),
01062                                  hmm->info.rc_id);
01063                 }
01064             }
01065             else if (hmm_frame(&hmm->hmm) == nf) {
01066                 phmmp = &(hmm->next);
01067             }
01068             else {
01069                 hmm_deinit(&hmm->hmm);
01070                 listelem_free(ngs->chan_alloc, hmm);
01071                 *phmmp = thmm;
01072             }
01073         }
01074         if ((k > 0) && (bitvec_is_clear(ngs->word_active, w))) {
01075             assert(!dict_is_single_phone(ps_search_dict(ngs), w));
01076             *(nawl++) = w;
01077             bitvec_set(ngs->word_active, w);
01078         }
01079     }
01080     ngs->n_active_word[nf & 0x1] = nawl - ngs->active_word_list[nf & 0x1];
01081 
01082     /*
01083      * Prune permanently allocated single-phone channels.
01084      * NOTES: score[] of pruned channels set to WORST_SCORE elsewhere.
01085      */
01086     for (i = 0; i < ngs->n_1ph_words; i++) {
01087         w = ngs->single_phone_wid[i];
01088         rhmm = (root_chan_t *) ngs->word_chan[w];
01089         E_DEBUG(3,("Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n",
01090                    dict_wordstr(ps_search_dict(ngs),w),
01091                    hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm),
01092                    lastphn_thresh, hmm_out_score(&rhmm->hmm), newword_thresh));
01093         if (hmm_frame(&rhmm->hmm) < frame_idx)
01094             continue;
01095         if (hmm_bestscore(&rhmm->hmm) BETTER_THAN lastphn_thresh) {
01096             hmm_frame(&rhmm->hmm) = nf;
01097 
01098             /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */
01099             if (hmm_out_score(&rhmm->hmm) BETTER_THAN newword_thresh) {
01100                 E_DEBUG(4,("Exiting single phone word %s with %d > %d, %d\n",
01101                            dict_wordstr(ps_search_dict(ngs),w),
01102                            hmm_out_score(&rhmm->hmm),
01103                            lastphn_thresh, newword_thresh));
01104                 ngram_search_save_bp(ngs, frame_idx, w,
01105                              hmm_out_score(&rhmm->hmm),
01106                              hmm_out_history(&rhmm->hmm), 0);
01107             }
01108         }
01109     }
01110 }
01111 
01112 static void
01113 prune_channels(ngram_search_t *ngs, int frame_idx)
01114 {
01115     /* Clear last phone candidate list. */
01116     ngs->n_lastphn_cand = 0;
01117     /* Set the dynamic beam based on maxhmmpf here. */
01118     ngs->dynamic_beam = ngs->beam;
01119     if (ngs->maxhmmpf != -1
01120         && ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval > ngs->maxhmmpf) {
01121         /* Build a histogram to approximately prune them. */
01122         int32 bins[256], bw, nhmms, i;
01123         root_chan_t *rhmm;
01124         chan_t **acl, *hmm;
01125 
01126         /* Bins go from zero (best score) to edge of beam. */
01127         bw = -ngs->beam / 256;
01128         memset(bins, 0, sizeof(bins));
01129         /* For each active root channel. */
01130         for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) {
01131             int32 b;
01132 
01133             /* Put it in a bin according to its bestscore. */
01134             b = (ngs->best_score - hmm_bestscore(&rhmm->hmm)) / bw;
01135             if (b >= 256)
01136                 b = 255;
01137             ++bins[b];
01138         }
01139         /* For each active non-root channel. */
01140         acl = ngs->active_chan_list[frame_idx & 0x1];       /* currently active HMMs in tree */
01141         for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++);
01142              i > 0; --i, hmm = *(acl++)) {
01143             int32 b;
01144 
01145             /* Put it in a bin according to its bestscore. */
01146             b = (ngs->best_score - hmm_bestscore(&hmm->hmm)) / bw;
01147             if (b >= 256)
01148                 b = 255;
01149             ++bins[b];
01150         }
01151         /* Walk down the bins to find the new beam. */
01152         for (i = nhmms = 0; i < 256; ++i) {
01153             nhmms += bins[i];
01154             if (nhmms > ngs->maxhmmpf)
01155                 break;
01156         }
01157         ngs->dynamic_beam = -(i * bw);
01158     }
01159 
01160     prune_root_chan(ngs, frame_idx);
01161     prune_nonroot_chan(ngs, frame_idx);
01162     last_phone_transition(ngs, frame_idx);
01163     prune_word_chan(ngs, frame_idx);
01164 }
01165 
01166 /*
01167  * Limit the number of word exits in each frame to maxwpf.  And also limit the number of filler
01168  * words to 1.
01169  */
01170 static void
01171 bptable_maxwpf(ngram_search_t *ngs, int frame_idx)
01172 {
01173     int32 bp, n;
01174     int32 bestscr, worstscr;
01175     bptbl_t *bpe, *bestbpe, *worstbpe;
01176 
01177     /* Don't prune if no pruing. */
01178     if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs))
01179         return;
01180 
01181     /* Allow only one filler word exit (the best) per frame */
01182     bestscr = (int32) 0x80000000;
01183     bestbpe = NULL;
01184     n = 0;
01185     for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01186         bpe = &(ngs->bp_table[bp]);
01187         if (dict_filler_word(ps_search_dict(ngs), bpe->wid)) {
01188             if (bpe->score BETTER_THAN bestscr) {
01189                 bestscr = bpe->score;
01190                 bestbpe = bpe;
01191             }
01192             bpe->valid = FALSE; /* Flag to indicate invalidation */
01193             n++;                /* No. of filler words */
01194         }
01195     }
01196     /* Restore bestbpe to valid state */
01197     if (bestbpe != NULL) {
01198         bestbpe->valid = TRUE;
01199         --n;
01200     }
01201 
01202     /* Allow up to maxwpf best entries to survive; mark the remaining with valid = 0 */
01203     n = (ngs->bpidx
01204          - ngs->bp_table_idx[frame_idx]) - n;  /* No. of entries after limiting fillers */
01205     for (; n > ngs->maxwpf; --n) {
01206         /* Find worst BPTable entry */
01207         worstscr = (int32) 0x7fffffff;
01208         worstbpe = NULL;
01209         for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) {
01210             bpe = &(ngs->bp_table[bp]);
01211             if (bpe->valid && (bpe->score WORSE_THAN worstscr)) {
01212                 worstscr = bpe->score;
01213                 worstbpe = bpe;
01214             }
01215         }
01216         /* FIXME: Don't panic! */
01217         if (worstbpe == NULL)
01218             E_FATAL("PANIC: No worst BPtable entry remaining\n");
01219         worstbpe->valid = 0;
01220     }
01221 }
01222 
01223 static void
01224 word_transition(ngram_search_t *ngs, int frame_idx)
01225 {
01226     int32 i, k, bp, w, nf;
01227     int32 rc;
01228     int32 *rcss;                /* right context score stack */
01229     int32 thresh, newscore;
01230     bptbl_t *bpe;
01231     root_chan_t *rhmm;
01232     struct bestbp_rc_s *bestbp_rc_ptr;
01233     phone_loop_search_t *pls;
01234     dict_t *dict = ps_search_dict(ngs);
01235     dict2pid_t *d2p = ps_search_dict2pid(ngs);
01236 
01237     /*
01238      * Transition to start of new word instances (HMM tree roots); but only if words
01239      * other than </s> finished here.
01240      * But, first, find the best starting score for each possible right context phone.
01241      */
01242     for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i)
01243         ngs->bestbp_rc[i].score = WORST_SCORE;
01244     k = 0;
01245     pls = (phone_loop_search_t *)ps_search_lookahead(ngs);
01246     /* Ugh, this is complicated.  Scan all word exits for this frame
01247      * (they have already been created by prune_word_chan()). */
01248     for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01249         bpe = &(ngs->bp_table[bp]);
01250         ngs->word_lat_idx[bpe->wid] = NO_BP;
01251 
01252         if (bpe->wid == ps_search_finish_wid(ngs))
01253             continue;
01254         k++;
01255 
01256         /* DICT2PID */
01257         /* Array of HMM scores corresponding to all the possible right
01258          * context expansions of the final phone.  It's likely that a
01259          * lot of these are going to be missing, actually. */
01260         rcss = &(ngs->bscore_stack[bpe->s_idx]);
01261         if (bpe->last2_phone == -1) {
01262             /* No right context expansion. */
01263             for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
01264                 if (rcss[0] BETTER_THAN ngs->bestbp_rc[rc].score) {
01265                     E_DEBUG(4,("bestbp_rc[0] = %d lc %d\n", rcss[0], bpe->last_phone));
01266                     ngs->bestbp_rc[rc].score = rcss[0];
01267                     ngs->bestbp_rc[rc].path = bp;
01268                     ngs->bestbp_rc[rc].lc = bpe->last_phone;
01269                 }
01270             }
01271         }
01272         else {
01273             xwdssid_t *rssid = dict2pid_rssid(d2p, bpe->last_phone, bpe->last2_phone);
01274             for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) {
01275                 if (rcss[rssid->cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) {
01276                     E_DEBUG(4,("bestbp_rc[%d] = %d lc %d\n",
01277                                rc, rcss[rssid->cimap[rc]], bpe->last_phone));
01278                     ngs->bestbp_rc[rc].score = rcss[rssid->cimap[rc]];
01279                     ngs->bestbp_rc[rc].path = bp;
01280                     ngs->bestbp_rc[rc].lc = bpe->last_phone;
01281                 }
01282             }
01283         }
01284     }
01285     if (k == 0)
01286         return;
01287 
01288     nf = frame_idx + 1;
01289     thresh = ngs->best_score + ngs->dynamic_beam;
01290     /*
01291      * Hypothesize successors to words finished in this frame.
01292      * Main dictionary, multi-phone words transition to HMM-trees roots.
01293      */
01294     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01295         bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->ciphone]);
01296 
01297         newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip
01298             + phone_loop_search_score(pls, rhmm->ciphone);
01299         if (newscore BETTER_THAN thresh) {
01300             if ((hmm_frame(&rhmm->hmm) < frame_idx)
01301                 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01302                 hmm_enter(&rhmm->hmm, newscore,
01303                           bestbp_rc_ptr->path, nf);
01304                 /* DICT2PID: Another place where mpx ssids are entered. */
01305                 /* Look up the ssid to use when entering this mpx triphone. */
01306                 hmm_mpx_ssid(&rhmm->hmm, 0) =
01307                     dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, bestbp_rc_ptr->lc);
01308                 assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
01309             }
01310         }
01311     }
01312 
01313     /*
01314      * Single phone words; no right context for these.  Cannot use bestbp_rc as
01315      * LM scores have to be included.  First find best transition to these words.
01316      */
01317     for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01318         w = ngs->single_phone_wid[i];
01319         ngs->last_ltrans[w].dscr = (int32) 0x80000000;
01320     }
01321     for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) {
01322         bpe = &(ngs->bp_table[bp]);
01323         if (!bpe->valid)
01324             continue;
01325 
01326         for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01327             int32 n_used;
01328             w = ngs->single_phone_wid[i];
01329             newscore = ngram_search_exit_score
01330                 (ngs, bpe, dict_first_phone(dict, w));
01331             E_DEBUG(4, ("initial newscore for %s: %d\n",
01332                         dict_wordstr(dict, w), newscore));
01333             newscore += ngram_tg_score(ngs->lmset,
01334                                        dict_basewid(dict, w),
01335                                        bpe->real_wid,
01336                                        bpe->prev_real_wid, &n_used);
01337 
01338             if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) {
01339                 ngs->last_ltrans[w].dscr = newscore;
01340                 ngs->last_ltrans[w].bp = bp;
01341             }
01342         }
01343     }
01344 
01345     /* Now transition to in-LM single phone words */
01346     for (i = 0; i < ngs->n_1ph_LMwords; i++) {
01347         w = ngs->single_phone_wid[i];
01348         rhmm = (root_chan_t *) ngs->word_chan[w];
01349         newscore = ngs->last_ltrans[w].dscr + ngs->pip
01350             + phone_loop_search_score(pls, rhmm->ciphone);
01351         if (newscore BETTER_THAN thresh) {
01352             bpe = ngs->bp_table + ngs->last_ltrans[w].bp;
01353             if ((hmm_frame(&rhmm->hmm) < frame_idx)
01354                 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01355                 hmm_enter(&rhmm->hmm,
01356                           newscore, ngs->last_ltrans[w].bp, nf);
01357                 /* DICT2PID: another place where mpx ssids are entered. */
01358                 /* Look up the ssid to use when entering this mpx triphone. */
01359                 hmm_mpx_ssid(&rhmm->hmm, 0) =
01360                     dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
01361                                       dict_last_phone(dict, bpe->wid));
01362                 assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID);
01363             }
01364         }
01365     }
01366 
01367     /* Remaining words: <sil>, noise words.  No mpx for these! */
01368     w = ps_search_silence_wid(ngs);
01369     rhmm = (root_chan_t *) ngs->word_chan[w];
01370     bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]);
01371     newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip
01372         + phone_loop_search_score(pls, rhmm->ciphone);
01373     if (newscore BETTER_THAN thresh) {
01374         if ((hmm_frame(&rhmm->hmm) < frame_idx)
01375             || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01376             hmm_enter(&rhmm->hmm,
01377                       newscore, bestbp_rc_ptr->path, nf);
01378         }
01379     }
01380     for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) {
01381         if (w == ps_search_silence_wid(ngs))
01382             continue;
01383         rhmm = (root_chan_t *) ngs->word_chan[w];
01384         /* If this was not actually a single-phone word, rhmm will be NULL. */
01385         if (rhmm == NULL)
01386             continue;
01387         newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip
01388             + phone_loop_search_score(pls, rhmm->ciphone);
01389         if (newscore BETTER_THAN thresh) {
01390             if ((hmm_frame(&rhmm->hmm) < frame_idx)
01391                 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
01392                 hmm_enter(&rhmm->hmm,
01393                           newscore, bestbp_rc_ptr->path, nf);
01394             }
01395         }
01396     }
01397 }
01398 
01399 static void
01400 deactivate_channels(ngram_search_t *ngs, int frame_idx)
01401 {
01402     root_chan_t *rhmm;
01403     int i;
01404 
01405     /* Clear score[] of pruned root channels */
01406     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01407         if (hmm_frame(&rhmm->hmm) == frame_idx) {
01408             hmm_clear_scores(&rhmm->hmm);
01409         }
01410     }
01411     /* Clear score[] of pruned single-phone channels */
01412     for (i = 0; i < ngs->n_1ph_words; i++) {
01413         int32 w = ngs->single_phone_wid[i];
01414         rhmm = (root_chan_t *) ngs->word_chan[w];
01415         if (hmm_frame(&rhmm->hmm) == frame_idx) {
01416             hmm_clear_scores(&rhmm->hmm);
01417         }
01418     }
01419 }
01420 
01421 int
01422 ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
01423 {
01424     int16 const *senscr;
01425 
01426     /* Activate our HMMs for the current frame if need be. */
01427     if (!ps_search_acmod(ngs)->compallsen)
01428         compute_sen_active(ngs, frame_idx);
01429 
01430     /* Compute GMM scores for the current frame. */
01431     if ((senscr = acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL)
01432         return 0;
01433     ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
01434 
01435     /* Mark backpointer table for current frame. */
01436     ngram_search_mark_bptable(ngs, frame_idx);
01437 
01438     /* If the best score is equal to or worse than WORST_SCORE,
01439      * recognition has failed, don't bother to keep trying. */
01440     if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE)
01441         return 0;
01442     /* Renormalize if necessary */
01443     if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
01444         E_INFO("Renormalizing Scores at frame %d, best score %d\n",
01445                frame_idx, ngs->best_score);
01446         renormalize_scores(ngs, frame_idx, ngs->best_score);
01447     }
01448 
01449     /* Evaluate HMMs */
01450     evaluate_channels(ngs, senscr, frame_idx);
01451     /* Prune HMMs and do phone transitions. */
01452     prune_channels(ngs, frame_idx);
01453     /* Do absolute pruning on word exits. */
01454     bptable_maxwpf(ngs, frame_idx);
01455     /* Do word transitions. */
01456     word_transition(ngs, frame_idx);
01457     /* Deactivate pruned HMMs. */
01458     deactivate_channels(ngs, frame_idx);
01459 
01460     ++ngs->n_frame;
01461     /* Return the number of frames processed. */
01462     return 1;
01463 }
01464 
01465 void
01466 ngram_fwdtree_finish(ngram_search_t *ngs)
01467 {
01468     int32 i, w, cf, *awl;
01469     root_chan_t *rhmm;
01470     chan_t *hmm, **acl;
01471 
01472     /* This is the number of frames processed. */
01473     cf = ps_search_acmod(ngs)->output_frame;
01474     /* Add a mark in the backpointer table for one past the final frame. */
01475     ngram_search_mark_bptable(ngs, cf);
01476 
01477     /* Deactivate channels lined up for the next frame */
01478     /* First, root channels of HMM tree */
01479     for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) {
01480         hmm_clear(&rhmm->hmm);
01481     }
01482 
01483     /* nonroot channels of HMM tree */
01484     i = ngs->n_active_chan[cf & 0x1];
01485     acl = ngs->active_chan_list[cf & 0x1];
01486     for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) {
01487         hmm_clear(&hmm->hmm);
01488     }
01489 
01490     /* word channels */
01491     i = ngs->n_active_word[cf & 0x1];
01492     awl = ngs->active_word_list[cf & 0x1];
01493     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
01494         /* Don't accidentally free single-phone words! */
01495         if (dict_is_single_phone(ps_search_dict(ngs), w))
01496             continue;
01497         bitvec_clear(ngs->word_active, w);
01498         if (ngs->word_chan[w] == NULL)
01499             continue;
01500         ngram_search_free_all_rc(ngs, w);
01501     }
01502 
01503     /*
01504      * The previous search code did a postprocessing of the
01505      * backpointer table here, but we will postpone this until it is
01506      * absolutely necessary, i.e. when generating a word graph.
01507      * Likewise we don't actually have to decide what the exit word is
01508      * until somebody requests a backtrace.
01509      */
01510 
01511     /* Print out some statistics. */
01512     if (cf > 0) {
01513         E_INFO("%8d words recognized (%d/fr)\n",
01514                ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
01515         E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
01516                (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
01517         E_INFO("%8d channels searched (%d/fr), %d 1st, %d last\n",
01518                ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval,
01519                (ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval) / (cf + 1),
01520                ngs->st.n_root_chan_eval, ngs->st.n_last_chan_eval);
01521         E_INFO("%8d words for which last channels evaluated (%d/fr)\n",
01522                ngs->st.n_word_lastchan_eval,
01523                ngs->st.n_word_lastchan_eval / (cf + 1));
01524         E_INFO("%8d candidate words for entering last phone (%d/fr)\n",
01525                ngs->st.n_lastphn_cand_utt, ngs->st.n_lastphn_cand_utt / (cf + 1));
01526     }
01527 }

Generated on Sat Jan 8 2011 for PocketSphinx by  doxygen 1.7.1