• Main Page
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/ngram_search_fwdflat.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00042 /* System headers. */
00043 #include <string.h>
00044 #include <assert.h>
00045 
00046 /* SphinxBase headers. */
00047 #include <ckd_alloc.h>
00048 #include <listelem_alloc.h>
00049 #include <err.h>
00050 
00051 /* Local headers. */
00052 #include "ngram_search.h"
00053 #include "ps_lattice_internal.h"
00054 
00055 /* Turn this on to dump channels for debugging */
00056 #define __CHAN_DUMP__           0
00057 #if __CHAN_DUMP__
00058 #define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr)
00059 #else
00060 #define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm)
00061 #endif
00062 
00063 static void
00064 ngram_fwdflat_expand_all(ngram_search_t *ngs)
00065 {
00066     int n_words, i;
00067 
00068     /* For all "real words" (not fillers or <s>/</s>) in the dictionary,
00069      *
00070      * 1) Add the ones which are in the LM to the fwdflat wordlist
00071      * 2) And to the expansion list (since we are expanding all)
00072      */
00073     ngs->n_expand_words = 0;
00074     n_words = ps_search_n_words(ngs);
00075     bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
00076     for (i = 0; i < n_words; ++i) {
00077         if (!dict_real_word(ps_search_dict(ngs), i))
00078             continue;
00079         if (!ngram_model_set_known_wid(ngs->lmset,
00080                                        dict_basewid(ps_search_dict(ngs),i)))
00081             continue;
00082         ngs->fwdflat_wordlist[ngs->n_expand_words] = i;
00083         ngs->expand_word_list[ngs->n_expand_words] = i;
00084         bitvec_set(ngs->expand_word_flag, i);
00085         ngs->n_expand_words++;
00086     }
00087     E_INFO("Utterance vocabulary contains %d words\n", ngs->n_expand_words);
00088     ngs->expand_word_list[ngs->n_expand_words] = -1;
00089     ngs->fwdflat_wordlist[ngs->n_expand_words] = -1;
00090 }
00091 
00092 static void
00093 ngram_fwdflat_allocate_1ph(ngram_search_t *ngs)
00094 {
00095     dict_t *dict = ps_search_dict(ngs);
00096     int n_words = ps_search_n_words(ngs);
00097     int i, w;
00098 
00099     /* Allocate single-phone words, since they won't have
00100      * been allocated for us by fwdtree initialization. */
00101     ngs->n_1ph_words = 0;
00102     for (w = 0; w < n_words; w++) {
00103         if (dict_is_single_phone(dict, w))
00104             ++ngs->n_1ph_words;
00105     }
00106     ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph));
00107     i = 0;
00108     for (w = 0; w < n_words; w++) {
00109         if (!dict_is_single_phone(dict, w))
00110             continue;
00111 
00112         /* DICT2PID location */
00113         ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w);
00114         ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef);
00115         hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE,
00116                  /* ssid */ bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef,
00117                                               ngs->rhmm_1ph[i].ciphone),
00118                  /* tmatid */ ngs->rhmm_1ph[i].ciphone);
00119         ngs->rhmm_1ph[i].next = NULL;
00120         ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]);
00121         i++;
00122     }
00123 }
00124 
00125 static void
00126 ngram_fwdflat_free_1ph(ngram_search_t *ngs)
00127 {
00128     int i, w;
00129     int n_words = ps_search_n_words(ngs);
00130 
00131     for (i = w = 0; w < n_words; ++w) {
00132         if (!dict_is_single_phone(ps_search_dict(ngs), w))
00133             continue;
00134         hmm_deinit(&ngs->rhmm_1ph[i].hmm);
00135         ++i;
00136     }
00137     ckd_free(ngs->rhmm_1ph);
00138     ngs->rhmm_1ph = NULL;
00139 }
00140 
00141 void
00142 ngram_fwdflat_init(ngram_search_t *ngs)
00143 {
00144     int n_words;
00145 
00146     n_words = ps_search_n_words(ngs);
00147     ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist));
00148     ngs->expand_word_flag = bitvec_alloc(n_words);
00149     ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list));
00150     ngs->frm_wordlist = ckd_calloc(ngs->n_frame_alloc, sizeof(*ngs->frm_wordlist));
00151     ngs->min_ef_width = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatefwid");
00152     ngs->max_sf_win = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatsfwin");
00153     E_INFO("fwdflat: min_ef_width = %d, max_sf_win = %d\n",
00154            ngs->min_ef_width, ngs->max_sf_win);
00155 
00156     /* No tree-search; pre-build the expansion list, including all LM words. */
00157     if (!ngs->fwdtree) {
00158         /* Build full expansion list from LM words. */
00159         ngram_fwdflat_expand_all(ngs);
00160         /* Allocate single phone words. */
00161         ngram_fwdflat_allocate_1ph(ngs);
00162     }
00163 }
00164 
00165 void
00166 ngram_fwdflat_deinit(ngram_search_t *ngs)
00167 {
00168     /* Free single-phone words if we allocated them. */
00169     if (!ngs->fwdtree) {
00170         ngram_fwdflat_free_1ph(ngs);
00171     }
00172     ckd_free(ngs->fwdflat_wordlist);
00173     bitvec_free(ngs->expand_word_flag);
00174     ckd_free(ngs->expand_word_list);
00175     ckd_free(ngs->frm_wordlist);
00176 }
00177 
00178 int
00179 ngram_fwdflat_reinit(ngram_search_t *ngs)
00180 {
00181     /* Reallocate things that depend on the number of words. */
00182     int n_words;
00183 
00184     ckd_free(ngs->fwdflat_wordlist);
00185     ckd_free(ngs->expand_word_list);
00186     bitvec_free(ngs->expand_word_flag);
00187     n_words = ps_search_n_words(ngs);
00188     ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist));
00189     ngs->expand_word_flag = bitvec_alloc(n_words);
00190     ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list));
00191     
00192     /* No tree-search; take care of the expansion list and single phone words. */
00193     if (!ngs->fwdtree) {
00194         /* Free single-phone words. */
00195         ngram_fwdflat_free_1ph(ngs);
00196         /* Reallocate word_chan. */
00197         ckd_free(ngs->word_chan);
00198         ngs->word_chan = ckd_calloc(dict_size(ps_search_dict(ngs)),
00199                                     sizeof(*ngs->word_chan));
00200         /* Rebuild full expansion list from LM words. */
00201         ngram_fwdflat_expand_all(ngs);
00202         /* Allocate single phone words. */
00203         ngram_fwdflat_allocate_1ph(ngs);
00204     }
00205     /* Otherwise there is nothing to do since the wordlist is
00206      * generated anew every utterance. */
00207     return 0;
00208 }
00209 
00213 static void
00214 build_fwdflat_wordlist(ngram_search_t *ngs)
00215 {
00216     int32 i, f, sf, ef, wid, nwd;
00217     dict_t *dict;
00218     bptbl_t *bp;
00219     ps_latnode_t *node, *prevnode, *nextnode;
00220 
00221     /* No tree-search, use statically allocated wordlist. */
00222     if (!ngs->fwdtree)
00223         return;
00224 
00225     dict = ps_search_dict(ngs);
00226 
00227     memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist));
00228 
00229     /* Scan the backpointer table for all active words and record
00230      * their exit frames. */
00231     for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) {
00232         sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1;
00233         ef = bp->frame;
00234         wid = bp->wid;
00235 
00236         /*
00237          * NOTE: fwdflat_wordlist excludes <s>, <sil> and noise words;
00238          * it includes </s>.  That is, it includes anything to which a
00239          * transition can be made in the LM.
00240          */
00241         /* Ignore silence and <s> */
00242         if (dict_filler_word(dict, wid) || (wid == dict_startwid(dict)))
00243             continue;
00244 
00245         /* Look for it in the wordlist. */
00246         for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid);
00247              node = node->next);
00248 
00249         /* Update last end frame. */
00250         if (node)
00251             node->lef = ef;
00252         else {
00253             /* New node; link to head of list */
00254             node = listelem_malloc(ngs->latnode_alloc);
00255             node->wid = wid;
00256             node->fef = node->lef = ef;
00257 
00258             node->next = ngs->frm_wordlist[sf];
00259             ngs->frm_wordlist[sf] = node;
00260         }
00261     }
00262 
00263     /* Eliminate "unlikely" words, for which there are too few end points */
00264     for (f = 0; f < ngs->n_frame; f++) {
00265         prevnode = NULL;
00266         for (node = ngs->frm_wordlist[f]; node; node = nextnode) {
00267             nextnode = node->next;
00268             /* Word has too few endpoints */
00269             if ((node->lef - node->fef < ngs->min_ef_width) ||
00270                 /* Word is </s> and doesn't actually end in last frame */
00271                 ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) {
00272                 if (!prevnode)
00273                     ngs->frm_wordlist[f] = nextnode;
00274                 else
00275                     prevnode->next = nextnode;
00276                 listelem_free(ngs->latnode_alloc, node);
00277             }
00278             else
00279                 prevnode = node;
00280         }
00281     }
00282 
00283     /* Form overall wordlist for 2nd pass */
00284     nwd = 0;
00285     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00286     for (f = 0; f < ngs->n_frame; f++) {
00287         for (node = ngs->frm_wordlist[f]; node; node = node->next) {
00288             if (!bitvec_is_set(ngs->word_active, node->wid)) {
00289                 bitvec_set(ngs->word_active, node->wid);
00290                 ngs->fwdflat_wordlist[nwd++] = node->wid;
00291             }
00292         }
00293     }
00294     ngs->fwdflat_wordlist[nwd] = -1;
00295     E_INFO("Utterance vocabulary contains %d words\n", nwd);
00296 }
00297 
00301 static void
00302 build_fwdflat_chan(ngram_search_t *ngs)
00303 {
00304     int32 i, wid, p;
00305     root_chan_t *rhmm;
00306     chan_t *hmm, *prevhmm;
00307     dict_t *dict;
00308     dict2pid_t *d2p;
00309 
00310     dict = ps_search_dict(ngs);
00311     d2p = ps_search_dict2pid(ngs);
00312 
00313     /* Build word HMMs for each word in the lattice. */
00314     for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00315         wid = ngs->fwdflat_wordlist[i];
00316 
00317         /* Omit single-phone words as they are permanently allocated */
00318         if (dict_is_single_phone(dict, wid))
00319             continue;
00320 
00321         assert(ngs->word_chan[wid] == NULL);
00322 
00323         /* Multiplex root HMM for first phone (one root per word, flat
00324          * lexicon).  diphone is irrelevant here, for the time being,
00325          * at least. */
00326         rhmm = listelem_malloc(ngs->root_chan_alloc);
00327         rhmm->ci2phone = dict_second_phone(dict, wid);
00328         rhmm->ciphone = dict_first_phone(dict, wid);
00329         rhmm->next = NULL;
00330         hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE,
00331                  bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone),
00332                  rhmm->ciphone);
00333 
00334         /* HMMs for word-internal phones */
00335         prevhmm = NULL;
00336         for (p = 1; p < dict_pronlen(dict, wid) - 1; p++) {
00337             hmm = listelem_malloc(ngs->chan_alloc);
00338             hmm->ciphone = dict_pron(dict, wid, p);
00339             hmm->info.rc_id = (p == dict_pronlen(dict, wid) - 1) ? 0 : -1;
00340             hmm->next = NULL;
00341             hmm_init(ngs->hmmctx, &hmm->hmm, FALSE,
00342                      dict2pid_internal(d2p,wid,p), hmm->ciphone);
00343 
00344             if (prevhmm)
00345                 prevhmm->next = hmm;
00346             else
00347                 rhmm->next = hmm;
00348 
00349             prevhmm = hmm;
00350         }
00351 
00352         /* Right-context phones */
00353         ngram_search_alloc_all_rc(ngs, wid);
00354 
00355         /* Link in just allocated right-context phones */
00356         if (prevhmm)
00357             prevhmm->next = ngs->word_chan[wid];
00358         else
00359             rhmm->next = ngs->word_chan[wid];
00360         ngs->word_chan[wid] = (chan_t *) rhmm;
00361     }
00362 }
00363 
00364 void
00365 ngram_fwdflat_start(ngram_search_t *ngs)
00366 {
00367     root_chan_t *rhmm;
00368     int i;
00369 
00370     build_fwdflat_wordlist(ngs);
00371     build_fwdflat_chan(ngs);
00372 
00373     ngs->bpidx = 0;
00374     ngs->bss_head = 0;
00375 
00376     for (i = 0; i < ps_search_n_words(ngs); i++)
00377         ngs->word_lat_idx[i] = NO_BP;
00378 
00379     /* Start search with <s>; word_chan[<s>] is permanently allocated */
00380     rhmm = (root_chan_t *) ngs->word_chan[ps_search_start_wid(ngs)];
00381     hmm_enter(&rhmm->hmm, 0, NO_BP, 0);
00382     ngs->active_word_list[0][0] = ps_search_start_wid(ngs);
00383     ngs->n_active_word[0] = 1;
00384 
00385     ngs->best_score = 0;
00386     ngs->renormalized = FALSE;
00387 
00388     for (i = 0; i < ps_search_n_words(ngs); i++)
00389         ngs->last_ltrans[i].sf = -1;
00390 
00391     if (!ngs->fwdtree)
00392         ngs->n_frame = 0;
00393 
00394     ngs->st.n_fwdflat_chan = 0;
00395     ngs->st.n_fwdflat_words = 0;
00396     ngs->st.n_fwdflat_word_transition = 0;
00397     ngs->st.n_senone_active_utt = 0;
00398 }
00399 
00400 static void
00401 compute_fwdflat_sen_active(ngram_search_t *ngs, int frame_idx)
00402 {
00403     int32 i, w;
00404     int32 *awl;
00405     root_chan_t *rhmm;
00406     chan_t *hmm;
00407 
00408     acmod_clear_active(ps_search_acmod(ngs));
00409 
00410     i = ngs->n_active_word[frame_idx & 0x1];
00411     awl = ngs->active_word_list[frame_idx & 0x1];
00412 
00413     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00414         rhmm = (root_chan_t *)ngs->word_chan[w];
00415         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00416             acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm);
00417         }
00418 
00419         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00420             if (hmm_frame(&hmm->hmm) == frame_idx) {
00421                 acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm);
00422             }
00423         }
00424     }
00425 }
00426 
00427 static void
00428 fwdflat_eval_chan(ngram_search_t *ngs, int frame_idx)
00429 {
00430     int32 i, w, bestscore;
00431     int32 *awl;
00432     root_chan_t *rhmm;
00433     chan_t *hmm;
00434 
00435     i = ngs->n_active_word[frame_idx & 0x1];
00436     awl = ngs->active_word_list[frame_idx & 0x1];
00437     bestscore = WORST_SCORE;
00438 
00439     ngs->st.n_fwdflat_words += i;
00440 
00441     /* Scan all active words. */
00442     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00443         rhmm = (root_chan_t *) ngs->word_chan[w];
00444         if (hmm_frame(&rhmm->hmm) == frame_idx) {
00445             int32 score = chan_v_eval(rhmm);
00446             if ((score BETTER_THAN bestscore) && (w != ps_search_finish_wid(ngs)))
00447                 bestscore = score;
00448             ngs->st.n_fwdflat_chan++;
00449         }
00450 
00451         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00452             if (hmm_frame(&hmm->hmm) == frame_idx) {
00453                 int32 score = chan_v_eval(hmm);
00454                 if (score BETTER_THAN bestscore)
00455                     bestscore = score;
00456                 ngs->st.n_fwdflat_chan++;
00457             }
00458         }
00459     }
00460 
00461     ngs->best_score = bestscore;
00462 }
00463 
00464 static void
00465 fwdflat_prune_chan(ngram_search_t *ngs, int frame_idx)
00466 {
00467     int32 i, cf, nf, w, pip, newscore, thresh, wordthresh;
00468     int32 *awl;
00469     root_chan_t *rhmm;
00470     chan_t *hmm, *nexthmm;
00471 
00472     cf = frame_idx;
00473     nf = cf + 1;
00474     i = ngs->n_active_word[cf & 0x1];
00475     awl = ngs->active_word_list[cf & 0x1];
00476     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00477 
00478     thresh = ngs->best_score + ngs->fwdflatbeam;
00479     wordthresh = ngs->best_score + ngs->fwdflatwbeam;
00480     pip = ngs->pip;
00481     E_DEBUG(3,("frame %d thresh %d wordthresh %d\n", frame_idx, thresh, wordthresh));
00482 
00483     /* Scan all active words. */
00484     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00485         rhmm = (root_chan_t *) ngs->word_chan[w];
00486         /* Propagate active root channels */
00487         if (hmm_frame(&rhmm->hmm) == cf
00488             && hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
00489             hmm_frame(&rhmm->hmm) = nf;
00490             bitvec_set(ngs->word_active, w);
00491 
00492             /* Transitions out of root channel */
00493             newscore = hmm_out_score(&rhmm->hmm);
00494             if (rhmm->next) {
00495                 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
00496 
00497                 newscore += pip;
00498                 if (newscore BETTER_THAN thresh) {
00499                     hmm = rhmm->next;
00500                     /* Enter all right context phones */
00501                     if (hmm->info.rc_id >= 0) {
00502                         for (; hmm; hmm = hmm->next) {
00503                             if ((hmm_frame(&hmm->hmm) < cf)
00504                                 || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
00505                                 hmm_enter(&hmm->hmm, newscore,
00506                                           hmm_out_history(&rhmm->hmm), nf);
00507                             }
00508                         }
00509                     }
00510                     /* Just a normal word internal phone */
00511                     else {
00512                         if ((hmm_frame(&hmm->hmm) < cf)
00513                             || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
00514                                 hmm_enter(&hmm->hmm, newscore,
00515                                           hmm_out_history(&rhmm->hmm), nf);
00516                         }
00517                     }
00518                 }
00519             }
00520             else {
00521                 assert(dict_is_single_phone(ps_search_dict(ngs), w));
00522 
00523                 /* Word exit for single-phone words (where did their
00524                  * whmms come from?) */
00525                 if (newscore BETTER_THAN wordthresh) {
00526                     ngram_search_save_bp(ngs, cf, w, newscore,
00527                                          hmm_out_history(&rhmm->hmm), 0);
00528                 }
00529             }
00530         }
00531 
00532         /* Transitions out of non-root channels. */
00533         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00534             if (hmm_frame(&hmm->hmm) >= cf) {
00535                 /* Propagate forward HMMs inside the beam. */
00536                 if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
00537                     hmm_frame(&hmm->hmm) = nf;
00538                     bitvec_set(ngs->word_active, w);
00539 
00540                     newscore = hmm_out_score(&hmm->hmm);
00541                     /* Word-internal phones */
00542                     if (hmm->info.rc_id < 0) {
00543                         newscore += pip;
00544                         if (newscore BETTER_THAN thresh) {
00545                             nexthmm = hmm->next;
00546                             /* Enter all right-context phones. */
00547                             if (nexthmm->info.rc_id >= 0) {
00548                                  for (; nexthmm; nexthmm = nexthmm->next) {
00549                                     if ((hmm_frame(&nexthmm->hmm) < cf)
00550                                         || (newscore BETTER_THAN
00551                                             hmm_in_score(&nexthmm->hmm))) {
00552                                         hmm_enter(&nexthmm->hmm,
00553                                                   newscore,
00554                                                   hmm_out_history(&hmm->hmm),
00555                                                   nf);
00556                                     }
00557                                 }
00558                             }
00559                             /* Enter single word-internal phone. */
00560                             else {
00561                                 if ((hmm_frame(&nexthmm->hmm) < cf)
00562                                     || (newscore BETTER_THAN
00563                                         hmm_in_score(&nexthmm->hmm))) {
00564                                     hmm_enter(&nexthmm->hmm, newscore,
00565                                               hmm_out_history(&hmm->hmm), nf);
00566                                 }
00567                             }
00568                         }
00569                     }
00570                     /* Right-context phones - apply word beam and exit. */
00571                     else {
00572                         if (newscore BETTER_THAN wordthresh) {
00573                             ngram_search_save_bp(ngs, cf, w, newscore,
00574                                                  hmm_out_history(&hmm->hmm),
00575                                                  hmm->info.rc_id);
00576                         }
00577                     }
00578                 }
00579                 /* Zero out inactive HMMs. */
00580                 else if (hmm_frame(&hmm->hmm) != nf) {
00581                     hmm_clear_scores(&hmm->hmm);
00582                 }
00583             }
00584         }
00585     }
00586 }
00587 
00588 static void
00589 get_expand_wordlist(ngram_search_t *ngs, int32 frm, int32 win)
00590 {
00591     int32 f, sf, ef;
00592     ps_latnode_t *node;
00593 
00594     if (!ngs->fwdtree) {
00595         ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
00596         return;
00597     }
00598 
00599     sf = frm - win;
00600     if (sf < 0)
00601         sf = 0;
00602     ef = frm + win;
00603     if (ef > ngs->n_frame)
00604         ef = ngs->n_frame;
00605 
00606     bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
00607     ngs->n_expand_words = 0;
00608 
00609     for (f = sf; f < ef; f++) {
00610         for (node = ngs->frm_wordlist[f]; node; node = node->next) {
00611             if (!bitvec_is_set(ngs->expand_word_flag, node->wid)) {
00612                 ngs->expand_word_list[ngs->n_expand_words++] = node->wid;
00613                 bitvec_set(ngs->expand_word_flag, node->wid);
00614             }
00615         }
00616     }
00617     ngs->expand_word_list[ngs->n_expand_words] = -1;
00618     ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
00619 }
00620 
00621 static void
00622 fwdflat_word_transition(ngram_search_t *ngs, int frame_idx)
00623 {
00624     int32 cf, nf, b, thresh, pip, i, w, newscore;
00625     int32 best_silrc_score = 0, best_silrc_bp = 0;      /* FIXME: good defaults? */
00626     bptbl_t *bp;
00627     int32 *rcss;
00628     root_chan_t *rhmm;
00629     int32 *awl;
00630     float32 lwf;
00631     dict_t *dict = ps_search_dict(ngs);
00632     dict2pid_t *d2p = ps_search_dict2pid(ngs);
00633 
00634     cf = frame_idx;
00635     nf = cf + 1;
00636     thresh = ngs->best_score + ngs->fwdflatbeam;
00637     pip = ngs->pip;
00638     best_silrc_score = WORST_SCORE;
00639     lwf = ngs->fwdflat_fwdtree_lw_ratio;
00640 
00641     /* Search for all words starting within a window of this frame.
00642      * These are the successors for words exiting now. */
00643     get_expand_wordlist(ngs, cf, ngs->max_sf_win);
00644 
00645     /* Scan words exited in current frame */
00646     for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) {
00647         xwdssid_t *rssid;
00648         int32 silscore;
00649 
00650         bp = ngs->bp_table + b;
00651         ngs->word_lat_idx[bp->wid] = NO_BP;
00652 
00653         if (bp->wid == ps_search_finish_wid(ngs))
00654             continue;
00655 
00656         /* DICT2PID location */
00657         /* Get the mapping from right context phone ID to index in the
00658          * right context table and the bscore_stack. */
00659         rcss = ngs->bscore_stack + bp->s_idx;
00660         if (bp->last2_phone == -1)
00661             rssid = NULL;
00662         else
00663             rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone);
00664 
00665         /* Transition to all successor words. */
00666         for (i = 0; ngs->expand_word_list[i] >= 0; i++) {
00667             int32 n_used;
00668 
00669             w = ngs->expand_word_list[i];
00670 
00671             /* Get the exit score we recorded in save_bwd_ptr(), or
00672              * something approximating it. */
00673             if (rssid)
00674                 newscore = rcss[rssid->cimap[dict_first_phone(dict, w)]];
00675             else
00676                 newscore = rcss[0];
00677             if (newscore == WORST_SCORE)
00678                 continue;
00679             /* FIXME: Floating point... */
00680             newscore += lwf
00681                 * ngram_tg_score(ngs->lmset,
00682                                  dict_basewid(dict, w),
00683                                  bp->real_wid,
00684                                  bp->prev_real_wid, &n_used);
00685             newscore += pip;
00686 
00687             /* Enter the next word */
00688             if (newscore BETTER_THAN thresh) {
00689                 rhmm = (root_chan_t *) ngs->word_chan[w];
00690                 if ((hmm_frame(&rhmm->hmm) < cf)
00691                     || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
00692                     hmm_enter(&rhmm->hmm, newscore, b, nf);
00693                     /* DICT2PID: This is where mpx ssids get introduced. */
00694                     /* Look up the ssid to use when entering this mpx triphone. */
00695                     hmm_mpx_ssid(&rhmm->hmm, 0) =
00696                         dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
00697                                           dict_last_phone(dict, bp->wid));
00698                     assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0)));
00699                     E_DEBUG(6,("ssid %d(%d,%d) = %d\n",
00700                                rhmm->ciphone, dict_last_phone(dict, bp->wid), rhmm->ci2phone,
00701                                hmm_mpx_ssid(&rhmm->hmm, 0)));
00702                     bitvec_set(ngs->word_active, w);
00703                 }
00704             }
00705         }
00706 
00707         /* Get the best exit into silence. */
00708         if (rssid)
00709             silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]];
00710         else
00711             silscore = rcss[0];
00712         if (silscore BETTER_THAN best_silrc_score) {
00713             best_silrc_score = silscore;
00714             best_silrc_bp = b;
00715         }
00716     }
00717 
00718     /* Transition to <sil> */
00719     newscore = best_silrc_score + ngs->silpen + pip;
00720     if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
00721         w = ps_search_silence_wid(ngs);
00722         rhmm = (root_chan_t *) ngs->word_chan[w];
00723         if ((hmm_frame(&rhmm->hmm) < cf)
00724             || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
00725             hmm_enter(&rhmm->hmm, newscore,
00726                       best_silrc_bp, nf);
00727             bitvec_set(ngs->word_active, w);
00728         }
00729     }
00730     /* Transition to noise words */
00731     newscore = best_silrc_score + ngs->fillpen + pip;
00732     if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
00733         for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) {
00734             rhmm = (root_chan_t *) ngs->word_chan[w];
00735             /* Noise words that aren't a single phone will have NULL here. */
00736             if (rhmm == NULL)
00737                 continue;
00738             if ((hmm_frame(&rhmm->hmm) < cf)
00739                 || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
00740                 hmm_enter(&rhmm->hmm, newscore,
00741                           best_silrc_bp, nf);
00742                 bitvec_set(ngs->word_active, w);
00743             }
00744         }
00745     }
00746 
00747     /* Reset initial channels of words that have become inactive even after word trans. */
00748     i = ngs->n_active_word[cf & 0x1];
00749     awl = ngs->active_word_list[cf & 0x1];
00750     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00751         rhmm = (root_chan_t *) ngs->word_chan[w];
00752         if (hmm_frame(&rhmm->hmm) == cf) {
00753             hmm_clear_scores(&rhmm->hmm);
00754         }
00755     }
00756 }
00757 
00758 static void
00759 fwdflat_renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm)
00760 {
00761     root_chan_t *rhmm;
00762     chan_t *hmm;
00763     int32 i, cf, w, *awl;
00764 
00765     cf = frame_idx;
00766 
00767     /* Renormalize individual word channels */
00768     i = ngs->n_active_word[cf & 0x1];
00769     awl = ngs->active_word_list[cf & 0x1];
00770     for (w = *(awl++); i > 0; --i, w = *(awl++)) {
00771         rhmm = (root_chan_t *) ngs->word_chan[w];
00772         if (hmm_frame(&rhmm->hmm) == cf) {
00773             hmm_normalize(&rhmm->hmm, norm);
00774         }
00775         for (hmm = rhmm->next; hmm; hmm = hmm->next) {
00776             if (hmm_frame(&hmm->hmm) == cf) {
00777                 hmm_normalize(&hmm->hmm, norm);
00778             }
00779         }
00780     }
00781 
00782     ngs->renormalized = TRUE;
00783 }
00784 
00785 int
00786 ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
00787 {
00788     int16 const *senscr;
00789     int32 nf, i, j;
00790     int32 *nawl;
00791 
00792     /* Activate our HMMs for the current frame if need be. */
00793     if (!ps_search_acmod(ngs)->compallsen)
00794         compute_fwdflat_sen_active(ngs, frame_idx);
00795 
00796     /* Compute GMM scores for the current frame. */
00797     senscr = acmod_score(ps_search_acmod(ngs), &frame_idx);
00798     ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active;
00799 
00800     /* Mark backpointer table for current frame. */
00801     ngram_search_mark_bptable(ngs, frame_idx);
00802 
00803     /* If the best score is equal to or worse than WORST_SCORE,
00804      * recognition has failed, don't bother to keep trying. */
00805     if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE)
00806         return 0;
00807     /* Renormalize if necessary */
00808     if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) {
00809         E_INFO("Renormalizing Scores at frame %d, best score %d\n",
00810                frame_idx, ngs->best_score);
00811         fwdflat_renormalize_scores(ngs, frame_idx, ngs->best_score);
00812     }
00813 
00814     ngs->best_score = WORST_SCORE;
00815     hmm_context_set_senscore(ngs->hmmctx, senscr);
00816 
00817     /* Evaluate HMMs */
00818     fwdflat_eval_chan(ngs, frame_idx);
00819     /* Prune HMMs and do phone transitions. */
00820     fwdflat_prune_chan(ngs, frame_idx);
00821     /* Do word transitions. */
00822     fwdflat_word_transition(ngs, frame_idx);
00823 
00824     /* Create next active word list */
00825     nf = frame_idx + 1;
00826     nawl = ngs->active_word_list[nf & 0x1];
00827     for (i = 0, j = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00828         if (bitvec_is_set(ngs->word_active, ngs->fwdflat_wordlist[i])) {
00829             *(nawl++) = ngs->fwdflat_wordlist[i];
00830             j++;
00831         }
00832     }
00833     for (i = ps_search_start_wid(ngs); i < ps_search_n_words(ngs); i++) {
00834         if (bitvec_is_set(ngs->word_active, i)) {
00835             *(nawl++) = i;
00836             j++;
00837         }
00838     }
00839     if (!ngs->fwdtree)
00840         ++ngs->n_frame;
00841     ngs->n_active_word[nf & 0x1] = j;
00842 
00843     /* Return the number of frames processed. */
00844     return 1;
00845 }
00846 
00850 static void
00851 destroy_fwdflat_wordlist(ngram_search_t *ngs)
00852 {
00853     ps_latnode_t *node, *tnode;
00854     int32 f;
00855 
00856     if (!ngs->fwdtree)
00857         return;
00858 
00859     for (f = 0; f < ngs->n_frame; f++) {
00860         for (node = ngs->frm_wordlist[f]; node; node = tnode) {
00861             tnode = node->next;
00862             listelem_free(ngs->latnode_alloc, node);
00863         }
00864     }
00865 }
00866 
00870 static void
00871 destroy_fwdflat_chan(ngram_search_t *ngs)
00872 {
00873     int32 i, wid;
00874 
00875     for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) {
00876         root_chan_t *rhmm;
00877         chan_t *thmm;
00878         wid = ngs->fwdflat_wordlist[i];
00879         if (dict_is_single_phone(ps_search_dict(ngs),wid))
00880             continue;
00881         assert(ngs->word_chan[wid] != NULL);
00882 
00883         /* The first HMM in ngs->word_chan[wid] was allocated with
00884          * ngs->root_chan_alloc, but this will attempt to free it
00885          * using ngs->chan_alloc, which will not work.  Therefore we
00886          * free it manually and move the list forward before handing
00887          * it off. */
00888         rhmm = (root_chan_t *)ngs->word_chan[wid];
00889         thmm = rhmm->next;
00890         listelem_free(ngs->root_chan_alloc, rhmm);
00891         ngs->word_chan[wid] = thmm;
00892         ngram_search_free_all_rc(ngs, wid);
00893     }
00894 }
00895 
00896 void
00897 ngram_fwdflat_finish(ngram_search_t *ngs)
00898 {
00899     int32 cf;
00900 
00901     destroy_fwdflat_chan(ngs);
00902     destroy_fwdflat_wordlist(ngs);
00903     bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
00904 
00905     /* This is the number of frames processed. */
00906     cf = ps_search_acmod(ngs)->output_frame;
00907     /* Add a mark in the backpointer table for one past the final frame. */
00908     ngram_search_mark_bptable(ngs, cf);
00909 
00910     /* Print out some statistics. */
00911     if (cf > 0) {
00912         E_INFO("%8d words recognized (%d/fr)\n",
00913                ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1));
00914         E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt,
00915                (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1));
00916         E_INFO("%8d channels searched (%d/fr)\n",
00917                ngs->st.n_fwdflat_chan, ngs->st.n_fwdflat_chan / (cf + 1));
00918         E_INFO("%8d words searched (%d/fr)\n",
00919                ngs->st.n_fwdflat_words, ngs->st.n_fwdflat_words / (cf + 1));
00920         E_INFO("%8d word transitions (%d/fr)\n",
00921                ngs->st.n_fwdflat_word_transition,
00922                ngs->st.n_fwdflat_word_transition / (cf + 1));
00923     }
00924 }

Generated on Sat Jan 8 2011 for PocketSphinx by  doxygen 1.7.1