PocketSphinx  0.6
ngram_search.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2008 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
42 /* System headers. */
43 #include <string.h>
44 #include <assert.h>
45 
46 /* SphinxBase headers. */
47 #include <sphinxbase/ckd_alloc.h>
48 #include <sphinxbase/listelem_alloc.h>
49 #include <sphinxbase/err.h>
50 
51 /* Local headers. */
52 #include "pocketsphinx_internal.h"
53 #include "ps_lattice_internal.h"
54 #include "ngram_search.h"
55 #include "ngram_search_fwdtree.h"
56 #include "ngram_search_fwdflat.h"
57 
58 static int ngram_search_start(ps_search_t *search);
59 static int ngram_search_step(ps_search_t *search, int frame_idx);
60 static int ngram_search_finish(ps_search_t *search);
61 static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
62 static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score);
63 static int32 ngram_search_prob(ps_search_t *search);
64 static ps_seg_t *ngram_search_seg_iter(ps_search_t *search, int32 *out_score);
65 
66 static ps_searchfuncs_t ngram_funcs = {
67  /* name: */ "ngram",
68  /* start: */ ngram_search_start,
69  /* step: */ ngram_search_step,
70  /* finish: */ ngram_search_finish,
71  /* reinit: */ ngram_search_reinit,
72  /* free: */ ngram_search_free,
73  /* lattice: */ ngram_search_lattice,
74  /* hyp: */ ngram_search_hyp,
75  /* prob: */ ngram_search_prob,
76  /* seg_iter: */ ngram_search_seg_iter,
77 };
78 
79 static void
80 ngram_search_update_widmap(ngram_search_t *ngs)
81 {
82  const char **words;
83  int32 i, n_words;
84 
85  /* It's okay to include fillers since they won't be in the LM */
86  n_words = ps_search_n_words(ngs);
87  words = ckd_calloc(n_words, sizeof(*words));
88  /* This will include alternates, again, that's okay since they aren't in the LM */
89  for (i = 0; i < n_words; ++i)
90  words[i] = (const char *)dict_wordstr(ps_search_dict(ngs), i);
91  ngram_model_set_map_words(ngs->lmset, words, n_words);
92  ckd_free(words);
93 }
94 
95 static void
96 ngram_search_calc_beams(ngram_search_t *ngs)
97 {
98  cmd_ln_t *config;
99  acmod_t *acmod;
100 
101  config = ps_search_config(ngs);
102  acmod = ps_search_acmod(ngs);
103 
104  /* Log beam widths. */
105  ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))>>SENSCR_SHIFT;
106  ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))>>SENSCR_SHIFT;
107  ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))>>SENSCR_SHIFT;
108  ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"))>>SENSCR_SHIFT;
109  ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"))>>SENSCR_SHIFT;
110  ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"))>>SENSCR_SHIFT;
111  ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"))>>SENSCR_SHIFT;
112 
113  /* Absolute pruning parameters. */
114  ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf");
115  ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf");
116 
117  /* Various penalties which may or may not be useful. */
118  ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) >>SENSCR_SHIFT;
119  ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen")) >>SENSCR_SHIFT;
120  ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) >>SENSCR_SHIFT;
121  ngs->silpen = ngs->pip
122  + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"))>>SENSCR_SHIFT);
123  ngs->fillpen = ngs->pip
124  + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"))>>SENSCR_SHIFT);
125 
126  /* Language weight ratios for fwdflat and bestpath search. */
127  ngs->fwdflat_fwdtree_lw_ratio =
128  cmd_ln_float32_r(config, "-fwdflatlw")
129  / cmd_ln_float32_r(config, "-lw");
130  ngs->bestpath_fwdtree_lw_ratio =
131  cmd_ln_float32_r(config, "-bestpathlw")
132  / cmd_ln_float32_r(config, "-lw");
133 
134  /* Acoustic score scale for posterior probabilities. */
135  ngs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
136 }
137 
138 ps_search_t *
139 ngram_search_init(cmd_ln_t *config,
140  acmod_t *acmod,
141  dict_t *dict,
142  dict2pid_t *d2p)
143 {
144  ngram_search_t *ngs;
145  const char *path;
146 
147  ngs = ckd_calloc(1, sizeof(*ngs));
148  ps_search_init(&ngs->base, &ngram_funcs, config, acmod, dict, d2p);
149  ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
150  acmod->tmat->tp, NULL, acmod->mdef->sseq);
151  if (ngs->hmmctx == NULL) {
152  ps_search_free(ps_search_base(ngs));
153  return NULL;
154  }
155  ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t));
156  ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t));
157  ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
158 
159  /* Calculate various beam widths and such. */
160  ngram_search_calc_beams(ngs);
161 
162  /* Allocate a billion different tables for stuff. */
163  ngs->word_chan = ckd_calloc(dict_size(dict),
164  sizeof(*ngs->word_chan));
165  ngs->word_lat_idx = ckd_calloc(dict_size(dict),
166  sizeof(*ngs->word_lat_idx));
167  ngs->word_active = bitvec_alloc(dict_size(dict));
168  ngs->last_ltrans = ckd_calloc(dict_size(dict),
169  sizeof(*ngs->last_ltrans));
170 
171  /* FIXME: All these structures need to be made dynamic with
172  * garbage collection. */
173  ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize");
174  ngs->bp_table = ckd_calloc(ngs->bp_table_size,
175  sizeof(*ngs->bp_table));
176  /* FIXME: This thing is frickin' huge. */
177  ngs->bscore_stack_size = ngs->bp_table_size * 20;
178  ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
179  sizeof(*ngs->bscore_stack));
180  ngs->n_frame_alloc = 256;
181  ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1,
182  sizeof(*ngs->bp_table_idx));
183  ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
184 
185  /* Allocate active word list array */
186  ngs->active_word_list = ckd_calloc_2d(2, dict_size(dict),
187  sizeof(**ngs->active_word_list));
188 
189  /* Load language model(s) */
190  if ((path = cmd_ln_str_r(config, "-lmctl"))) {
191  ngs->lmset = ngram_model_set_read(config, path, acmod->lmath);
192  if (ngs->lmset == NULL) {
193  E_ERROR("Failed to read language model control file: %s\n",
194  path);
195  goto error_out;
196  }
197  /* Set the default language model if needed. */
198  if ((path = cmd_ln_str_r(config, "-lmname"))) {
199  ngram_model_set_select(ngs->lmset, path);
200  }
201  }
202  else if ((path = cmd_ln_str_r(config, "-lm"))) {
203  static const char *name = "default";
204  ngram_model_t *lm;
205 
206  lm = ngram_model_read(config, path, NGRAM_AUTO, acmod->lmath);
207  if (lm == NULL) {
208  E_ERROR("Failed to read language model file: %s\n", path);
209  goto error_out;
210  }
211  ngs->lmset = ngram_model_set_init(config,
212  &lm, (char **)&name,
213  NULL, 1);
214  if (ngs->lmset == NULL) {
215  E_ERROR("Failed to initialize language model set\n");
216  goto error_out;
217  }
218  }
219  if (ngs->lmset != NULL
220  && ngram_wid(ngs->lmset, S3_FINISH_WORD) == ngram_unknown_wid(ngs->lmset)) {
221  E_ERROR("Language model/set does not contain </s>, recognition will fail\n");
222  goto error_out;
223  }
224 
225  /* Create word mappings. */
226  ngram_search_update_widmap(ngs);
227 
228  /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */
229  if (cmd_ln_boolean_r(config, "-fwdtree")) {
230  ngram_fwdtree_init(ngs);
231  ngs->fwdtree = TRUE;
232  ngs->fwdtree_perf.name = "fwdtree";
233  ptmr_init(&ngs->fwdtree_perf);
234  }
235  if (cmd_ln_boolean_r(config, "-fwdflat")) {
236  ngram_fwdflat_init(ngs);
237  ngs->fwdflat = TRUE;
238  ngs->fwdflat_perf.name = "fwdflat";
239  ptmr_init(&ngs->fwdflat_perf);
240  }
241  if (cmd_ln_boolean_r(config, "-bestpath")) {
242  ngs->bestpath = TRUE;
243  ngs->bestpath_perf.name = "bestpath";
244  ptmr_init(&ngs->bestpath_perf);
245  }
246 
247  return (ps_search_t *)ngs;
248 
249 error_out:
251  return NULL;
252 }
253 
254 static int
255 ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
256 {
257  ngram_search_t *ngs = (ngram_search_t *)search;
258  int old_n_words;
259  int rv = 0;
260 
261  /* Update the number of words. */
262  old_n_words = search->n_words;
263  if (old_n_words != dict_size(dict)) {
264  search->n_words = dict_size(dict);
265  /* Reallocate these temporary arrays. */
266  ckd_free(ngs->word_lat_idx);
267  ckd_free(ngs->word_active);
268  ckd_free(ngs->last_ltrans);
269  ckd_free_2d(ngs->active_word_list);
270  ngs->word_lat_idx = ckd_calloc(search->n_words, sizeof(*ngs->word_lat_idx));
271  ngs->word_active = bitvec_alloc(search->n_words);
272  ngs->last_ltrans = ckd_calloc(search->n_words, sizeof(*ngs->last_ltrans));
273  ngs->active_word_list
274  = ckd_calloc_2d(2, search->n_words,
275  sizeof(**ngs->active_word_list));
276  }
277 
278  /* Free old dict2pid, dict */
279  ps_search_base_reinit(search, dict, d2p);
280 
281  if (ngs->lmset == NULL)
282  return;
283 
284  /* Update beam widths. */
285  ngram_search_calc_beams(ngs);
286 
287  /* Update word mappings. */
288  ngram_search_update_widmap(ngs);
289 
290  /* Now rebuild lextrees. */
291  if (ngs->fwdtree) {
292  if ((rv = ngram_fwdtree_reinit(ngs)) < 0)
293  return rv;
294  }
295  if (ngs->fwdflat) {
296  if ((rv = ngram_fwdflat_reinit(ngs)) < 0)
297  return rv;
298  }
299 
300  return rv;
301 }
302 
303 void
305 {
306  ngram_search_t *ngs = (ngram_search_t *)search;
307 
308  ps_search_deinit(search);
309  if (ngs->fwdtree)
311  if (ngs->fwdflat)
313  if (ngs->bestpath) {
314  double n_speech = (double)ngs->n_tot_frame
315  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
316 
317  E_INFO("TOTAL bestpath %.2f CPU %.3f xRT\n",
318  ngs->bestpath_perf.t_tot_cpu,
319  ngs->bestpath_perf.t_tot_cpu / n_speech);
320  E_INFO("TOTAL bestpath %.2f wall %.3f xRT\n",
321  ngs->bestpath_perf.t_tot_elapsed,
322  ngs->bestpath_perf.t_tot_elapsed / n_speech);
323  }
324 
325  hmm_context_free(ngs->hmmctx);
326  listelem_alloc_free(ngs->chan_alloc);
327  listelem_alloc_free(ngs->root_chan_alloc);
328  listelem_alloc_free(ngs->latnode_alloc);
329  ngram_model_free(ngs->lmset);
330 
331  ckd_free(ngs->word_chan);
332  ckd_free(ngs->word_lat_idx);
333  bitvec_free(ngs->word_active);
334  ckd_free(ngs->bp_table);
335  ckd_free(ngs->bscore_stack);
336  if (ngs->bp_table_idx != NULL)
337  ckd_free(ngs->bp_table_idx - 1);
338  ckd_free_2d(ngs->active_word_list);
339  ckd_free(ngs->last_ltrans);
340  ckd_free(ngs);
341 }
342 
343 int
345 {
346  if (frame_idx >= ngs->n_frame_alloc) {
347  ngs->n_frame_alloc *= 2;
348  ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
349  (ngs->n_frame_alloc + 1)
350  * sizeof(*ngs->bp_table_idx));
351  if (ngs->frm_wordlist) {
352  ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist,
353  ngs->n_frame_alloc
354  * sizeof(*ngs->frm_wordlist));
355  }
356  ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
357  }
358  ngs->bp_table_idx[frame_idx] = ngs->bpidx;
359  return ngs->bpidx;
360 }
361 
362 static void
363 set_real_wid(ngram_search_t *ngs, int32 bp)
364 {
365  bptbl_t *ent, *prev;
366 
367  assert(bp != NO_BP);
368  ent = ngs->bp_table + bp;
369  if (ent->bp == NO_BP)
370  prev = NULL;
371  else
372  prev = ngs->bp_table + ent->bp;
373 
374  /* Propagate lm state for fillers, rotate it for words. */
375  if (dict_filler_word(ps_search_dict(ngs), ent->wid)) {
376  if (prev != NULL) {
377  ent->real_wid = prev->real_wid;
378  ent->prev_real_wid = prev->prev_real_wid;
379  }
380  else {
381  ent->real_wid = dict_basewid(ps_search_dict(ngs),
382  ent->wid);
383  ent->prev_real_wid = BAD_S3WID;
384  }
385  }
386  else {
387  ent->real_wid = dict_basewid(ps_search_dict(ngs), ent->wid);
388  if (prev != NULL)
389  ent->prev_real_wid = prev->real_wid;
390  else
391  ent->prev_real_wid = BAD_S3WID;
392  }
393 }
394 
395 void
397  int32 w, int32 score, int32 path, int32 rc)
398 {
399  int32 bp;
400 
401  /* Look for an existing exit for this word in this frame. The
402  * only reason one would exist is from a different right context
403  * triphone, but of course that happens quite frequently. */
404  bp = ngs->word_lat_idx[w];
405  if (bp != NO_BP) {
406  /* Keep only the best scoring one, we will reconstruct the
407  * others from the right context scores - usually the history
408  * is not lost. */
409  if (ngs->bp_table[bp].score WORSE_THAN score) {
410  assert(path != bp); /* Pathological. */
411  if (ngs->bp_table[bp].bp != path) {
412  int32 bplh[2], newlh[2];
413  /* But, sometimes, the history *is* lost. If we wanted to
414  * do exact language model scoring we'd have to preserve
415  * these alternate histories. */
416  E_DEBUG(2,("Updating path history %d => %d frame %d\n",
417  ngs->bp_table[bp].bp, path, frame_idx));
418  bplh[0] = ngs->bp_table[bp].bp == -1
419  ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].prev_real_wid;
420  bplh[1] = ngs->bp_table[bp].bp == -1
421  ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].real_wid;
422  newlh[0] = path == -1
423  ? -1 : ngs->bp_table[path].prev_real_wid;
424  newlh[1] = path == -1
425  ? -1 : ngs->bp_table[path].real_wid;
426  /* Actually it's worth checking how often the actual
427  * language model state changes. */
428  if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
429  /* It's fairly rare that the actual language model
430  * state changes, but it does happen some
431  * times. */
432  E_DEBUG(1, ("Updating language model state %s,%s => %s,%s frame %d\n",
433  dict_wordstr(ps_search_dict(ngs), bplh[0]),
434  dict_wordstr(ps_search_dict(ngs), bplh[1]),
435  dict_wordstr(ps_search_dict(ngs), newlh[0]),
436  dict_wordstr(ps_search_dict(ngs), newlh[1]),
437  frame_idx));
438  set_real_wid(ngs, bp);
439  }
440  ngs->bp_table[bp].bp = path;
441  }
442  ngs->bp_table[bp].score = score;
443  }
444  /* But do keep track of scores for all right contexts, since
445  * we need them to determine the starting path scores for any
446  * successors of this word exit. */
447  if (ngs->bp_table[bp].s_idx != -1)
448  ngs->bscore_stack[ngs->bp_table[bp].s_idx + rc] = score;
449  }
450  else {
451  int32 i, rcsize;
452  bptbl_t *be;
453 
454  /* This might happen if recognition fails. */
455  if (ngs->bpidx == NO_BP) {
456  E_ERROR("No entries in backpointer table!");
457  return;
458  }
459 
460  /* Expand the backpointer tables if necessary. */
461  if (ngs->bpidx >= ngs->bp_table_size) {
462  ngs->bp_table_size *= 2;
463  ngs->bp_table = ckd_realloc(ngs->bp_table,
464  ngs->bp_table_size
465  * sizeof(*ngs->bp_table));
466  E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size);
467  }
468  if (ngs->bss_head >= ngs->bscore_stack_size
469  - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
470  ngs->bscore_stack_size *= 2;
471  ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
472  ngs->bscore_stack_size
473  * sizeof(*ngs->bscore_stack));
474  E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size);
475  }
476 
477  ngs->word_lat_idx[w] = ngs->bpidx;
478  be = &(ngs->bp_table[ngs->bpidx]);
479  be->wid = w;
480  be->frame = frame_idx;
481  be->bp = path;
482  be->score = score;
483  be->s_idx = ngs->bss_head;
484  be->valid = TRUE;
485  assert(path != ngs->bpidx);
486 
487  /* DICT2PID */
488  /* Get diphone ID for final phone and number of ssids corresponding to it. */
489  be->last_phone = dict_last_phone(ps_search_dict(ngs),w);
490  if (dict_is_single_phone(ps_search_dict(ngs), w)) {
491  be->last2_phone = -1;
492  be->s_idx = -1;
493  rcsize = 0;
494  }
495  else {
496  be->last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
497  rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
498  be->last_phone, be->last2_phone)->n_ssid;
499  }
500  /* Allocate some space on the bscore_stack for all of these triphones. */
501  for (i = 0; i < rcsize; ++i)
502  ngs->bscore_stack[ngs->bss_head + i] = WORST_SCORE;
503  if (rcsize)
504  ngs->bscore_stack[ngs->bss_head + rc] = score;
505  set_real_wid(ngs, ngs->bpidx);
506 
507  ngs->bpidx++;
508  ngs->bss_head += rcsize;
509  }
510 }
511 
512 int
513 ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score)
514 {
515  /* End of backpointers for this frame. */
516  int end_bpidx;
517  int best_exit, bp;
518  int32 best_score;
519 
520  /* No hypothesis means no exit node! */
521  if (ngs->n_frame == 0)
522  return NO_BP;
523 
524  if (frame_idx == -1 || frame_idx >= ngs->n_frame)
525  frame_idx = ngs->n_frame - 1;
526  end_bpidx = ngs->bp_table_idx[frame_idx];
527 
528  best_score = WORST_SCORE;
529  best_exit = NO_BP;
530 
531  /* Scan back to find a frame with some backpointers in it. */
532  while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
533  --frame_idx;
534  /* This is NOT an error, it just means there is no hypothesis yet. */
535  if (frame_idx < 0)
536  return NO_BP;
537 
538  /* Now find the entry for </s> OR the best scoring entry. */
539  assert(end_bpidx < ngs->bp_table_size);
540  for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
541  if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)
542  || ngs->bp_table[bp].score BETTER_THAN best_score) {
543  best_score = ngs->bp_table[bp].score;
544  best_exit = bp;
545  }
546  if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs))
547  break;
548  }
549 
550  if (out_best_score) *out_best_score = best_score;
551  return best_exit;
552 }
553 
554 char const *
556 {
557  ps_search_t *base = ps_search_base(ngs);
558  char *c;
559  size_t len;
560  int bp;
561 
562  if (bpidx == NO_BP)
563  return NULL;
564 
565  bp = bpidx;
566  len = 0;
567  while (bp != NO_BP) {
568  bptbl_t *be = &ngs->bp_table[bp];
569  bp = be->bp;
570  if (dict_real_word(ps_search_dict(ngs), be->wid))
571  len += strlen(dict_basestr(ps_search_dict(ngs), be->wid)) + 1;
572  }
573 
574  ckd_free(base->hyp_str);
575  if (len == 0) {
576  base->hyp_str = NULL;
577  return base->hyp_str;
578  }
579  base->hyp_str = ckd_calloc(1, len);
580 
581  bp = bpidx;
582  c = base->hyp_str + len - 1;
583  while (bp != NO_BP) {
584  bptbl_t *be = &ngs->bp_table[bp];
585  size_t len;
586 
587  bp = be->bp;
588  if (dict_real_word(ps_search_dict(ngs), be->wid)) {
589  len = strlen(dict_basestr(ps_search_dict(ngs), be->wid));
590  c -= len;
591  memcpy(c, dict_basestr(ps_search_dict(ngs), be->wid), len);
592  if (c > base->hyp_str) {
593  --c;
594  *c = ' ';
595  }
596  }
597  }
598 
599  return base->hyp_str;
600 }
601 
602 void
604 {
605  chan_t *hmm, *thmm;
606  xwdssid_t *rssid;
607  int32 i, tmatid, ciphone;
608 
609  /* DICT2PID */
610  /* Get pointer to array of triphones for final diphone. */
611  assert(!dict_is_single_phone(ps_search_dict(ngs), w));
612  ciphone = dict_last_phone(ps_search_dict(ngs),w);
613  rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
614  ciphone,
615  dict_second_last_phone(ps_search_dict(ngs),w));
616  tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
617  hmm = ngs->word_chan[w];
618  if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->hmm) != rssid->ssid[0])) {
619  hmm = listelem_malloc(ngs->chan_alloc);
620  hmm->next = ngs->word_chan[w];
621  ngs->word_chan[w] = hmm;
622 
623  hmm->info.rc_id = 0;
624  hmm->ciphone = ciphone;
625  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[0], tmatid);
626  E_DEBUG(3,("allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
627  rssid->ssid[0], hmm->ciphone,
628  dict_second_last_phone(ps_search_dict(ngs),w),
629  dict_wordstr(ps_search_dict(ngs),w)));
630  }
631  for (i = 1; i < rssid->n_ssid; ++i) {
632  if ((hmm->next == NULL) || (hmm_nonmpx_ssid(&hmm->next->hmm) != rssid->ssid[i])) {
633  thmm = listelem_malloc(ngs->chan_alloc);
634  thmm->next = hmm->next;
635  hmm->next = thmm;
636  hmm = thmm;
637 
638  hmm->info.rc_id = i;
639  hmm->ciphone = ciphone;
640  hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[i], tmatid);
641  E_DEBUG(3,("allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
642  i, rssid->ssid[i], hmm->ciphone,
643  dict_second_last_phone(ps_search_dict(ngs),w),
644  dict_wordstr(ps_search_dict(ngs),w)));
645  }
646  else
647  hmm = hmm->next;
648  }
649 }
650 
651 void
653 {
654  chan_t *hmm, *thmm;
655 
656  for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
657  thmm = hmm->next;
658  hmm_deinit(&hmm->hmm);
659  listelem_free(ngs->chan_alloc, hmm);
660  }
661  ngs->word_chan[w] = NULL;
662 }
663 
664 int32
666 {
667  /* DICT2PID */
668  /* Get the mapping from right context phone ID to index in the
669  * right context table and the bscore_stack. */
670  if (pbe->last2_phone == -1) {
671  /* No right context for single phone predecessor words. */
672  return pbe->score;
673  }
674  else {
675  xwdssid_t *rssid;
676  /* Find the index for the last diphone of the previous word +
677  * the first phone of the current word. */
678  rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
679  pbe->last_phone, pbe->last2_phone);
680  /* This may be WORST_SCORE, which means that there was no exit
681  * with rcphone as right context. */
682  return ngs->bscore_stack[pbe->s_idx + rssid->cimap[rcphone]];
683  }
684 }
685 
686 /*
687  * Compute acoustic and LM scores for a BPTable entry (segment).
688  */
689 void
690 ngram_compute_seg_score(ngram_search_t *ngs, bptbl_t *be, float32 lwf,
691  int32 *out_ascr, int32 *out_lscr)
692 {
693  bptbl_t *pbe;
694  int32 start_score;
695 
696  /* Start of utterance. */
697  if (be->bp == NO_BP) {
698  *out_ascr = be->score;
699  *out_lscr = 0;
700  return;
701  }
702 
703  /* Otherwise, calculate lscr and ascr. */
704  pbe = ngs->bp_table + be->bp;
705  start_score = ngram_search_exit_score(ngs, pbe,
706  dict_first_phone(ps_search_dict(ngs),be->wid));
707  assert(start_score BETTER_THAN WORST_SCORE);
708 
709  /* FIXME: These result in positive acoustic scores when filler
710  words have non-filler pronunciations. That whole business
711  is still pretty much broken but at least it doesn't
712  segfault. */
713  if (be->wid == ps_search_silence_wid(ngs)) {
714  *out_lscr = ngs->silpen;
715  }
716  else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
717  *out_lscr = ngs->fillpen;
718  }
719  else {
720  int32 n_used;
721  *out_lscr = ngram_tg_score(ngs->lmset,
722  be->real_wid,
723  pbe->real_wid,
724  pbe->prev_real_wid,
725  &n_used)>>SENSCR_SHIFT;
726  *out_lscr = *out_lscr * lwf;
727  }
728  *out_ascr = be->score - start_score - *out_lscr;
729 }
730 
731 static int
732 ngram_search_start(ps_search_t *search)
733 {
734  ngram_search_t *ngs = (ngram_search_t *)search;
735 
736  ngs->done = FALSE;
737  ngram_model_flush(ngs->lmset);
738  if (ngs->fwdtree)
739  ngram_fwdtree_start(ngs);
740  else if (ngs->fwdflat)
741  ngram_fwdflat_start(ngs);
742  else
743  return -1;
744  return 0;
745 }
746 
747 static int
748 ngram_search_step(ps_search_t *search, int frame_idx)
749 {
750  ngram_search_t *ngs = (ngram_search_t *)search;
751 
752  if (ngs->fwdtree)
753  return ngram_fwdtree_search(ngs, frame_idx);
754  else if (ngs->fwdflat)
755  return ngram_fwdflat_search(ngs, frame_idx);
756  else
757  return -1;
758 }
759 
760 void
761 dump_bptable(ngram_search_t *ngs)
762 {
763  int i;
764  E_INFO("Backpointer table (%d entries):\n", ngs->bpidx);
765  for (i = 0; i < ngs->bpidx; ++i) {
766  bptbl_t *bpe = ngs->bp_table + i;
767  int j, rcsize;
768 
769  E_INFO_NOFN("%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
770  i, dict_wordstr(ps_search_dict(ngs), bpe->wid),
771  (bpe->bp == -1
772  ? 0 : ngs->bp_table[bpe->bp].frame + 1),
773  bpe->frame, bpe->score, bpe->bp,
774  bpe->real_wid, bpe->prev_real_wid);
775 
776  if (bpe->last2_phone == -1)
777  rcsize = 0;
778  else
779  rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
780  bpe->last_phone, bpe->last2_phone)->n_ssid;
781  if (rcsize) {
782  E_INFOCONT("\tbss");
783  for (j = 0; j < rcsize; ++j)
784  if (ngs->bscore_stack[bpe->s_idx + j] != WORST_SCORE)
785  E_INFOCONT(" %d", bpe->score - ngs->bscore_stack[bpe->s_idx + j]);
786  }
787  E_INFOCONT("\n");
788  }
789 }
790 
791 static int
792 ngram_search_finish(ps_search_t *search)
793 {
794  ngram_search_t *ngs = (ngram_search_t *)search;
795 
796  ngs->n_tot_frame += ngs->n_frame;
797  if (ngs->fwdtree) {
799  /* dump_bptable(ngs); */
800 
801  /* Now do fwdflat search in its entirety, if requested. */
802  if (ngs->fwdflat) {
803  int i;
804  /* Rewind the acoustic model. */
805  if (acmod_rewind(ps_search_acmod(ngs)) < 0)
806  return -1;
807  /* Now redo search. */
808  ngram_fwdflat_start(ngs);
809  i = 0;
810  while (ps_search_acmod(ngs)->n_feat_frame > 0) {
811  int nfr;
812  if ((nfr = ngram_fwdflat_search(ngs, i)) < 0)
813  return nfr;
814  acmod_advance(ps_search_acmod(ngs));
815  ++i;
816  }
818  /* And now, we should have a result... */
819  /* dump_bptable(ngs); */
820  }
821  }
822  else if (ngs->fwdflat) {
824  }
825 
826  /* Mark the current utterance as done. */
827  ngs->done = TRUE;
828  return 0;
829 }
830 
831 static ps_latlink_t *
832 ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
833 {
834  ngram_search_t *ngs = (ngram_search_t *)search;
835 
836  if (search->last_link == NULL) {
837  search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset,
838  ngs->bestpath_fwdtree_lw_ratio,
839  ngs->ascale);
840  if (search->last_link == NULL)
841  return NULL;
842  /* Also calculate betas so we can fill in the posterior
843  * probability field in the segmentation. */
844  if (search->post == 0)
845  search->post = ps_lattice_posterior(search->dag, ngs->lmset,
846  ngs->ascale);
847  }
848  if (out_score)
849  *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
850  return search->last_link;
851 }
852 
853 static char const *
854 ngram_search_hyp(ps_search_t *search, int32 *out_score)
855 {
856  ngram_search_t *ngs = (ngram_search_t *)search;
857 
858  /* Only do bestpath search if the utterance is complete. */
859  if (ngs->bestpath && ngs->done) {
860  ps_lattice_t *dag;
861  ps_latlink_t *link;
862  char const *hyp;
863  double n_speech;
864 
865  ptmr_reset(&ngs->bestpath_perf);
866  ptmr_start(&ngs->bestpath_perf);
867  if ((dag = ngram_search_lattice(search)) == NULL)
868  return NULL;
869  if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
870  return NULL;
871  hyp = ps_lattice_hyp(dag, link);
872  ptmr_stop(&ngs->bestpath_perf);
873  n_speech = (double)dag->n_frames
874  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
875  E_INFO("bestpath %.2f CPU %.3f xRT\n",
876  ngs->bestpath_perf.t_cpu,
877  ngs->bestpath_perf.t_cpu / n_speech);
878  E_INFO("bestpath %.2f wall %.3f xRT\n",
879  ngs->bestpath_perf.t_elapsed,
880  ngs->bestpath_perf.t_elapsed / n_speech);
881  return hyp;
882  }
883  else {
884  int32 bpidx;
885 
886  /* fwdtree and fwdflat use same backpointer table. */
887  bpidx = ngram_search_find_exit(ngs, -1, out_score);
888  if (bpidx != NO_BP)
889  return ngram_search_bp_hyp(ngs, bpidx);
890  }
891 
892  return NULL;
893 }
894 
895 static void
896 ngram_search_bp2itor(ps_seg_t *seg, int bp)
897 {
898  ngram_search_t *ngs = (ngram_search_t *)seg->search;
899  bptbl_t *be, *pbe;
900 
901  be = &ngs->bp_table[bp];
902  pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp];
903  seg->word = dict_wordstr(ps_search_dict(ngs), be->wid);
904  seg->ef = be->frame;
905  seg->sf = pbe ? pbe->frame + 1 : 0;
906  seg->prob = 0; /* Bogus value... */
907  /* Compute acoustic and LM scores for this segment. */
908  if (pbe == NULL) {
909  seg->ascr = be->score;
910  seg->lscr = 0;
911  seg->lback = 0;
912  }
913  else {
914  int32 start_score;
915 
916  /* Find ending path score of previous word. */
917  start_score = ngram_search_exit_score(ngs, pbe,
918  dict_first_phone(ps_search_dict(ngs), be->wid));
919  assert(start_score BETTER_THAN WORST_SCORE);
920  if (be->wid == ps_search_silence_wid(ngs)) {
921  seg->lscr = ngs->silpen;
922  }
923  else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
924  seg->lscr = ngs->fillpen;
925  }
926  else {
927  seg->lscr = ngram_tg_score(ngs->lmset,
928  be->real_wid,
929  pbe->real_wid,
930  pbe->prev_real_wid,
931  &seg->lback)>>SENSCR_SHIFT;
932  seg->lscr = (int32)(seg->lscr * seg->lwf);
933  }
934  seg->ascr = be->score - start_score - seg->lscr;
935  }
936 }
937 
938 static void
939 ngram_bp_seg_free(ps_seg_t *seg)
940 {
941  bptbl_seg_t *itor = (bptbl_seg_t *)seg;
942 
943  ckd_free(itor->bpidx);
944  ckd_free(itor);
945 }
946 
947 static ps_seg_t *
948 ngram_bp_seg_next(ps_seg_t *seg)
949 {
950  bptbl_seg_t *itor = (bptbl_seg_t *)seg;
951 
952  if (++itor->cur == itor->n_bpidx) {
953  ngram_bp_seg_free(seg);
954  return NULL;
955  }
956 
957  ngram_search_bp2itor(seg, itor->bpidx[itor->cur]);
958  return seg;
959 }
960 
961 static ps_segfuncs_t ngram_bp_segfuncs = {
962  /* seg_next */ ngram_bp_seg_next,
963  /* seg_free */ ngram_bp_seg_free
964 };
965 
966 static ps_seg_t *
967 ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf)
968 {
969  bptbl_seg_t *itor;
970  int bp, cur;
971 
972  /* Calling this an "iterator" is a bit of a misnomer since we have
973  * to get the entire backtrace in order to produce it. On the
974  * other hand, all we actually need is the bptbl IDs, and we can
975  * allocate a fixed-size array of them. */
976  itor = ckd_calloc(1, sizeof(*itor));
977  itor->base.vt = &ngram_bp_segfuncs;
978  itor->base.search = ps_search_base(ngs);
979  itor->base.lwf = lwf;
980  itor->n_bpidx = 0;
981  bp = bpidx;
982  while (bp != NO_BP) {
983  bptbl_t *be = &ngs->bp_table[bp];
984  bp = be->bp;
985  ++itor->n_bpidx;
986  }
987  if (itor->n_bpidx == 0) {
988  ckd_free(itor);
989  return NULL;
990  }
991  itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx));
992  cur = itor->n_bpidx - 1;
993  bp = bpidx;
994  while (bp != NO_BP) {
995  bptbl_t *be = &ngs->bp_table[bp];
996  itor->bpidx[cur] = bp;
997  bp = be->bp;
998  --cur;
999  }
1000 
1001  /* Fill in relevant fields for first element. */
1002  ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]);
1003 
1004  return (ps_seg_t *)itor;
1005 }
1006 
1007 static ps_seg_t *
1008 ngram_search_seg_iter(ps_search_t *search, int32 *out_score)
1009 {
1010  ngram_search_t *ngs = (ngram_search_t *)search;
1011 
1012  /* Only do bestpath search if the utterance is done. */
1013  if (ngs->bestpath && ngs->done) {
1014  ps_lattice_t *dag;
1015  ps_latlink_t *link;
1016  double n_speech;
1017  ps_seg_t *itor;
1018 
1019  ptmr_reset(&ngs->bestpath_perf);
1020  ptmr_start(&ngs->bestpath_perf);
1021  if ((dag = ngram_search_lattice(search)) == NULL)
1022  return NULL;
1023  if ((link = ngram_search_bestpath(search, out_score, TRUE)) == NULL)
1024  return NULL;
1025  itor = ps_lattice_seg_iter(dag, link,
1026  ngs->bestpath_fwdtree_lw_ratio);
1027  ptmr_stop(&ngs->bestpath_perf);
1028  n_speech = (double)dag->n_frames
1029  / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
1030  E_INFO("bestpath %.2f CPU %.3f xRT\n",
1031  ngs->bestpath_perf.t_cpu,
1032  ngs->bestpath_perf.t_cpu / n_speech);
1033  E_INFO("bestpath %.2f wall %.3f xRT\n",
1034  ngs->bestpath_perf.t_elapsed,
1035  ngs->bestpath_perf.t_elapsed / n_speech);
1036  return itor;
1037  }
1038  else {
1039  int32 bpidx;
1040 
1041  /* fwdtree and fwdflat use same backpointer table. */
1042  bpidx = ngram_search_find_exit(ngs, -1, out_score);
1043  return ngram_search_bp_iter(ngs, bpidx,
1044  /* but different language weights... */
1045  (ngs->done && ngs->fwdflat)
1046  ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
1047  }
1048 
1049  return NULL;
1050 }
1051 
1052 static int32
1053 ngram_search_prob(ps_search_t *search)
1054 {
1055  ngram_search_t *ngs = (ngram_search_t *)search;
1056 
1057  /* Only do bestpath search if the utterance is done. */
1058  if (ngs->bestpath && ngs->done) {
1059  ps_lattice_t *dag;
1060  ps_latlink_t *link;
1061 
1062  if ((dag = ngram_search_lattice(search)) == NULL)
1063  return 0;
1064  if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1065  return 0;
1066  return search->post;
1067  }
1068  else {
1069  /* FIXME: Give some kind of good estimate here, eventually. */
1070  return 0;
1071  }
1072 }
1073 
1074 static void
1075 create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag)
1076 {
1077  bptbl_t *bp_ptr;
1078  int32 i;
1079 
1080  for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
1081  int32 sf, ef, wid;
1082  ps_latnode_t *node;
1083 
1084  /* Skip invalid backpointers (these result from -maxwpf pruning) */
1085  if (!bp_ptr->valid)
1086  continue;
1087 
1088  sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1;
1089  ef = bp_ptr->frame;
1090  wid = bp_ptr->wid;
1091 
1092  assert(ef < dag->n_frames);
1093  /* Skip non-final </s> entries. */
1094  if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1))
1095  continue;
1096 
1097  /* Skip if word not in LM */
1098  if ((!dict_filler_word(ps_search_dict(ngs), wid))
1099  && (!ngram_model_set_known_wid(ngs->lmset,
1100  dict_basewid(ps_search_dict(ngs), wid))))
1101  continue;
1102 
1103  /* See if bptbl entry <wid,sf> already in lattice */
1104  for (node = dag->nodes; node; node = node->next) {
1105  if ((node->wid == wid) && (node->sf == sf))
1106  break;
1107  }
1108 
1109  /* For the moment, store bptbl indices in node.{fef,lef} */
1110  if (node)
1111  node->lef = i;
1112  else {
1113  /* New node; link to head of list */
1114  node = listelem_malloc(dag->latnode_alloc);
1115  node->wid = wid;
1116  node->sf = sf; /* This is a frame index. */
1117  node->fef = node->lef = i; /* These are backpointer indices (argh) */
1118  node->reachable = FALSE;
1119  node->entries = NULL;
1120  node->exits = NULL;
1121 
1122  /* NOTE: This creates the list of nodes in reverse
1123  * topological order, i.e. a node always precedes its
1124  * antecedents in this list. */
1125  node->next = dag->nodes;
1126  dag->nodes = node;
1127  ++dag->n_nodes;
1128  }
1129  }
1130 }
1131 
1132 static ps_latnode_t *
1133 find_start_node(ngram_search_t *ngs, ps_lattice_t *dag)
1134 {
1135  ps_latnode_t *node;
1136 
1137  /* Find start node <s>.0 */
1138  for (node = dag->nodes; node; node = node->next) {
1139  if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0))
1140  break;
1141  }
1142  if (!node) {
1143  /* This is probably impossible. */
1144  E_ERROR("Couldn't find <s> in first frame\n");
1145  return NULL;
1146  }
1147  return node;
1148 }
1149 
1150 static ps_latnode_t *
1151 find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf)
1152 {
1153  ps_latnode_t *node;
1154  int32 ef, bestbp, bp, bestscore;
1155 
1156  /* Find final node </s>.last_frame; nothing can follow this node */
1157  for (node = dag->nodes; node; node = node->next) {
1158  int32 lef = ngs->bp_table[node->lef].frame;
1159  if ((node->wid == ps_search_finish_wid(ngs))
1160  && (lef == dag->n_frames - 1))
1161  break;
1162  }
1163  if (node != NULL)
1164  return node;
1165 
1166  /* It is quite likely that no </s> exited in the last frame. So,
1167  * find the node corresponding to the best exit. */
1168  /* Find the last frame containing a word exit. */
1169  for (ef = dag->n_frames - 1;
1170  ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
1171  --ef);
1172  if (ef < 0) {
1173  E_ERROR("Empty backpointer table: can not build DAG.\n");
1174  return NULL;
1175  }
1176 
1177  /* Find best word exit in that frame. */
1178  bestscore = WORST_SCORE;
1179  bestbp = NO_BP;
1180  for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
1181  int32 n_used, l_scr, wid, prev_wid;
1182  wid = ngs->bp_table[bp].real_wid;
1183  prev_wid = ngs->bp_table[bp].prev_real_wid;
1184  /* Always prefer </s>, of which there will only be one per frame. */
1185  if (wid == ps_search_finish_wid(ngs)) {
1186  bestbp = bp;
1187  break;
1188  }
1189  l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs),
1190  wid, prev_wid, &n_used) >>SENSCR_SHIFT;
1191  l_scr = l_scr * lwf;
1192  if (ngs->bp_table[bp].score + l_scr BETTER_THAN bestscore) {
1193  bestscore = ngs->bp_table[bp].score + l_scr;
1194  bestbp = bp;
1195  }
1196  }
1197  if (bestbp == NO_BP) {
1198  E_ERROR("No word exits found in last frame (%d), assuming no recognition\n", ef);
1199  return NULL;
1200  }
1201  E_INFO("</s> not found in last frame, using %s.%d instead\n",
1202  dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid), ef);
1203 
1204  /* Now find the node that corresponds to it. */
1205  for (node = dag->nodes; node; node = node->next) {
1206  if (node->lef == bestbp)
1207  return node;
1208  }
1209 
1210  /* FIXME: This seems to happen a lot! */
1211  E_ERROR("Failed to find DAG node corresponding to %s\n",
1212  dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid));
1213  return NULL;
1214 }
1215 
1216 /*
1217  * Build lattice from bptable.
1218  */
1219 ps_lattice_t *
1221 {
1222  int32 i, score, ascr, lscr;
1223  ps_latnode_t *node, *from, *to;
1224  ngram_search_t *ngs;
1225  ps_lattice_t *dag;
1226  int min_endfr, nlink;
1227  float lwf;
1228 
1229  ngs = (ngram_search_t *)search;
1230  min_endfr = cmd_ln_int32_r(ps_search_config(search), "-min_endfr");
1231 
1232  /* If the best score is WORST_SCORE or worse, there is no way to
1233  * make a lattice. */
1235  return NULL;
1236 
1237  /* Check to see if a lattice has previously been created over the
1238  * same number of frames, and reuse it if so. */
1239  if (search->dag && search->dag->n_frames == ngs->n_frame)
1240  return search->dag;
1241 
1242  /* Nope, create a new one. */
1243  ps_lattice_free(search->dag);
1244  search->dag = NULL;
1245  dag = ps_lattice_init_search(search, ngs->n_frame);
1246  /* Compute these such that they agree with the fwdtree language weight. */
1247  lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
1248  create_dag_nodes(ngs, dag);
1249  if ((dag->start = find_start_node(ngs, dag)) == NULL)
1250  goto error_out;
1251  if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
1252  goto error_out;
1253  E_INFO("lattice start node %s.%d end node %s.%d\n",
1254  dict_wordstr(search->dict, dag->start->wid), dag->start->sf,
1255  dict_wordstr(search->dict, dag->end->wid), dag->end->sf);
1256 
1257  ngram_compute_seg_score(ngs, ngs->bp_table + dag->end->lef, lwf,
1258  &dag->final_node_ascr, &lscr);
1259 
1260  /*
1261  * At this point, dag->nodes is ordered such that nodes earlier in
1262  * the list can follow (in time) those later in the list, but not
1263  * vice versa (see above - also note that adjacency is purely
1264  * determined by time which is why we can make this claim). Now
1265  * create precedence links and simultanesously mark all nodes that
1266  * can reach dag->end. (All nodes are reached from dag->start
1267  * simply by definition - they were created that way).
1268  *
1269  * Note that this also means that any nodes before dag->end in the
1270  * list can be discarded, meaning that dag->end will always be
1271  * equal to dag->nodes (FIXME: except when loading from a file but
1272  * we can fix that...)
1273  */
1274  i = 0;
1275  while (dag->nodes && dag->nodes != dag->end) {
1276  ps_latnode_t *next = dag->nodes->next;
1277  listelem_free(dag->latnode_alloc, dag->nodes);
1278  dag->nodes = next;
1279  ++i;
1280  }
1281  E_INFO("Eliminated %d nodes before end node\n", i);
1282  dag->end->reachable = TRUE;
1283  nlink = 0;
1284  for (to = dag->end; to; to = to->next) {
1285  int fef, lef;
1286 
1287  /* Skip if not reachable; it will never be reachable from dag->end */
1288  if (!to->reachable)
1289  continue;
1290 
1291  /* Prune nodes with too few endpoints - heuristic
1292  borrowed from Sphinx3 */
1293  fef = ngs->bp_table[to->fef].frame;
1294  lef = ngs->bp_table[to->lef].frame;
1295  if (to != dag->end && lef - fef < min_endfr) {
1296  to->reachable = FALSE;
1297  continue;
1298  }
1299 
1300  /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */
1301  for (from = to->next; from; from = from->next) {
1302  bptbl_t *from_bpe;
1303 
1304  fef = ngs->bp_table[from->fef].frame;
1305  lef = ngs->bp_table[from->lef].frame;
1306 
1307  if ((to->sf <= fef) || (to->sf > lef + 1))
1308  continue;
1309  if (lef - fef < min_endfr) {
1310  assert(!from->reachable);
1311  continue;
1312  }
1313 
1314  /* Find bptable entry for "from" that exactly precedes "to" */
1315  i = from->fef;
1316  from_bpe = ngs->bp_table + i;
1317  for (; i <= from->lef; i++, from_bpe++) {
1318  if (from_bpe->wid != from->wid)
1319  continue;
1320  if (from_bpe->frame >= to->sf - 1)
1321  break;
1322  }
1323 
1324  if ((i > from->lef) || (from_bpe->frame != to->sf - 1))
1325  continue;
1326 
1327  /* Find acoustic score from.sf->to.sf-1 with right context = to */
1328  /* This gives us from_bpe's best acoustic score. */
1329  ngram_compute_seg_score(ngs, from_bpe, lwf,
1330  &ascr, &lscr);
1331  /* Now find the exact path score for from->to, including
1332  * the appropriate final triphone. In fact this might not
1333  * exist. */
1334  score = ngram_search_exit_score(ngs, from_bpe,
1335  dict_first_phone(ps_search_dict(ngs), to->wid));
1336  /* Does not exist. Can't create a link here. */
1337  if (score == WORST_SCORE)
1338  continue;
1339  /* Adjust the arc score to match the correct triphone. */
1340  else
1341  score = ascr + (score - from_bpe->score);
1342  if (score BETTER_THAN 0) {
1343  /* Scores must be negative, or Bad Things will happen.
1344  In general, they are, except in corner cases
1345  involving filler words. We don't want to throw any
1346  links away so we'll keep these, but with some
1347  arbitrarily improbable but recognizable score. */
1348  ps_lattice_link(dag, from, to, -424242, from_bpe->frame);
1349  ++nlink;
1350  from->reachable = TRUE;
1351  }
1352  else if (score BETTER_THAN WORST_SCORE) {
1353  ps_lattice_link(dag, from, to, score, from_bpe->frame);
1354  ++nlink;
1355  from->reachable = TRUE;
1356  }
1357  }
1358  }
1359 
1360  /* There must be at least one path between dag->start and dag->end */
1361  if (!dag->start->reachable) {
1362  E_ERROR("End node of lattice isolated; unreachable\n");
1363  goto error_out;
1364  }
1365 
1366  for (node = dag->nodes; node; node = node->next) {
1367  /* Change node->{fef,lef} from bptbl indices to frames. */
1368  node->fef = ngs->bp_table[node->fef].frame;
1369  node->lef = ngs->bp_table[node->lef].frame;
1370  /* Find base wid for nodes. */
1371  node->basewid = dict_basewid(search->dict, node->wid);
1372  }
1373 
1374  /* Link nodes with alternate pronunciations at the same timepoint. */
1375  for (node = dag->nodes; node; node = node->next) {
1376  ps_latnode_t *alt;
1377  /* Scan forward to find the next alternate, then stop. */
1378  for (alt = node->next; alt && alt->sf == node->sf; alt = alt->next) {
1379  if (alt->basewid == node->basewid) {
1380  alt->alt = node->alt;
1381  node->alt = alt;
1382  break;
1383  }
1384  }
1385  }
1386  E_INFO("Lattice has %d nodes, %d links\n", dag->n_nodes, nlink);
1387 
1388  /* Minor hack: If the final node is a filler word and not </s>,
1389  * then set its base word ID to </s>, so that the language model
1390  * scores won't be screwed up. */
1391  if (dict_filler_word(ps_search_dict(ngs), dag->end->wid))
1392  dag->end->basewid = ps_search_finish_wid(ngs);
1393 
1394  /* Free nodes unreachable from dag->end and their links */
1396 
1397  /* Build links around silence and filler words, since they do not
1398  * exist in the language model. */
1399  ps_lattice_bypass_fillers(dag, ngs->silpen, ngs->fillpen);
1400 
1401  search->dag = dag;
1402  return dag;
1403 
1404 error_out:
1405  ps_lattice_free(dag);
1406  return NULL;
1407 }