dag.h
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1995-2004 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 /*
38  * dag.h -- Library for DAG
39  *
40  * **********************************************
41  * CMU ARPA Speech Project
42  *
43  * Copyright (c) 1996 Carnegie Mellon University.
44  * ALL RIGHTS RESERVED.
45  * **********************************************
46  *
47  * HISTORY
48  *
49  * $Log: dag.h,v $
50  * Revision 1.2 2006/02/23 05:22:32 arthchan2003
51  * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: 1, Fixed bugs from last check in, lw should be * instead of +, 2, Moved most of the functions from flat_fwd.c and s3_dag.c to here. Things that required specified will be prefixed.
52  *
53  * Revision 1.1.4.5 2005/11/17 06:25:04 arthchan2003
54  * 1, Added structure to record node-based ascr and lscr. 2, Added a version of dag_link that copies the langauge model score as well.
55  *
56  * Revision 1.1.4.4 2005/09/25 19:20:43 arthchan2003
57  * Added hooks in dag_node and dag_link. Probably need some time to use it various routines of ours.
58  *
59  * Revision 1.1.4.3 2005/09/11 23:07:28 arthchan2003
60  * srch.c now support lattice rescoring by rereading the generated lattice in a file. When it is operated, silence cannot be unlinked from the dictionary. This is a hack and its reflected in the code of dag, kbcore and srch. code
61  *
62  * Revision 1.1.4.2 2005/09/11 02:56:47 arthchan2003
63  * Log. Incorporated all dag related functions from s3_dag.c and
64  * flat_fwd.c. dag_search, dag_add_fudge, dag_remove_filler is now
65  * shared by dag and decode_anytopo. (Hurray!). s3_astar.c still has
66  * special functions and it probably unavoidable.
67  *
68  * Revision 1.1.4.1 2005/07/17 05:44:31 arthchan2003
69  * Added dag_write_header so that DAG header writer could be shared between 3.x and 3.0. However, because the backtrack pointer structure is different in 3.x and 3.0. The DAG writer still can't be shared yet.
70  *
71  * Revision 1.1 2005/06/21 22:37:47 arthchan2003
72  * Build a stand-alone wrapper for direct acyclic graph, it is now shared across dag/astar and decode_anytopo. This eliminate about 500 lines of code in decode_anytopo/dag and astar. However, its existence still can't exterminate code duplication between dag/decode_anytopo. That effectively means we have many refactoring to do. Things that are still pretty difficult to merge include dag_search(decode_anytopo/dag) and dag_read (dag/astar).
73  *
74  * Revision 1.2 2005/06/03 06:45:28 archan
75  * 1, Fixed compilation of dag_destroy, dag_dump and dag_build. 2, Changed RARG to REQARG.
76  *
77  * Revision 1.1 2005/06/03 05:46:19 archan
78  * Refactoring across dag/astar/decode_anytopo. Code is not fully tested.
79  * There are several changes I have done to refactor the code across
80  * dag/astar/decode_anyptop. A new library called dag.c is now created
81  * to include all routines that are shared by the three applications that
82  * required graph operations.
83  * 1, dag_link is now shared between dag and decode_anytopo. Unfortunately, astar was using a slightly different version of dag_link. At this point, I could only rename astar'dag_link to be astar_dag_link.
84  * 2, dag_update_link is shared by both dag and decode_anytopo.
85  * 3, hyp_free is now shared by misc.c, dag and decode_anytopo
86  * 4, filler_word will not exist anymore, dict_filler_word was used instead.
87  * 5, dag_param_read were shared by both dag and astar.
88  * 6, dag_destroy are now shared by dag/astar/decode_anytopo. Though for some reasons, even the function was not called properly, it is still compiled in linux. There must be something wrong at this point.
89  * 7, dag_bestpath and dag_backtrack are now shared by dag and decode_anytopo. One important thing to notice here is that decode_anytopo's version of the two functions actually multiply the LM score or filler penalty by the language weight. At this point, s3_dag is always using lwf=1.
90  * 8, dag_chk_linkscr is shared by dag and decode_anytopo.
91  * 9, decode_anytopo nows supports another three options -maxedge, -maxlmop and -maxlpf. Their usage is similar to what one could find dag.
92  *
93  * Notice that the code of the best path search in dag and that of 2-nd
94  * stage of decode_anytopo could still have some differences. It could
95  * be the subtle difference of handling of the option -fudge. I am yet
96  * to know what the true cause is.
97  *
98  * Some other small changes include
99  * -removal of startwid and finishwid asstatic variables in s3_dag.c. dict.c now hide these two variables.
100  *
101  * There are functions I want to merge but I couldn't and it will be
102  * important to say the reasons.
103  * i, dag_remove_filler_nodes. The version in dag and decode_anytopo
104  * work slightly differently. The decode_anytopo's one attached a dummy
105  * predecessor after removal of the filler nodes.
106  * ii, dag_search.(s3dag_dag_search and s3flat_fwd_dag_search) The handling of fudge is differetn. Also, decode_anytopo's one now depend on variable lattice.
107  * iii, dag_load, (s3dag_dag_load and s3astar_dag_load) astar and dag seems to work in a slightly different, one required removal of arcs, one required bypass the arcs. Don't understand them yet.
108  * iv, dag_dump, it depends on the variable lattice.
109  *
110  */
111 
112 #ifndef _LIBFBS_DAG_H_
113 #define _LIBFBS_DAG_H_
114 
115 #include <stdio.h>
116 
117 #include <listelem_alloc.h>
118 #include <s3types.h>
119 #include <cmd_ln.h>
120 #include <logmath.h>
121 
122 #include "search.h"
123 #include "dict.h"
124 #include "lm.h"
125 #include "fillpen.h"
126 
127 
128 #ifdef __cplusplus
129 extern "C" {
130 #endif
131 #if 0
132 /* Fool Emacs. */
133 }
134 #endif
135 
136 #define SPHINX_LATTICE_FORMAT 0
137 #define IBM_LATTICE_FORMAT 1
138 
139 
148 typedef struct dagnode_s {
150  int32 seqid;
154  struct daglink_s *succlist;
155  struct daglink_s *predlist;
156  int32 node_ascr;
157  int32 node_lscr;
158  void *hook;
159  uint8 reachable;
163 } dagnode_t;
164 
170 typedef struct daglink_s {
174  struct daglink_s *next;
175  struct daglink_s *history;
176  struct daglink_s *bypass;
180  int32 ascr;
183  int32 lscr;
184  int32 pscr;
185  int32 hscr;
190  int16 pscr_valid;
192  void *hook;
194 } daglink_t;
195 
204 typedef struct {
210  daglink_t final;
214  int32 nfrm;
215  int32 nlink;
216  int32 nnode;
217  int32 nbypass;
219  int32 maxedge;
223  int32 lmop;
224  int32 maxlmop;
228  int32 fudged;
230  void *hook;
232  cmd_ln_t *config;
233  listelem_alloc_t *node_alloc;
234  listelem_alloc_t *link_alloc;
235  logmath_t *logmath;
236 } dag_t;
237 
238 
241 void hyp_free (srch_hyp_t *list);
242 
244 void dag_init(dag_t* dagp, cmd_ln_t *config, logmath_t *logmath);
245 
246 
251 int32 dag_link (dag_t * dagp,
252  dagnode_t *pd,
253  dagnode_t *d,
254  int32 ascr,
255  int32 lscr,
256  int32 ef,
257  daglink_t *byp
258  );
259 
260 
262  dagnode_t *dst,
263  int32 bypass
264  );
265 
267  dagnode_t *dst,
268  int32 bypass
269  );
270 
271 int32 dag_update_link (dag_t* dagp,
272  dagnode_t *pd,
273  dagnode_t *d,
274  int32 ascr,
275  int32 ef,
276  daglink_t *byp
277  );
278 
282 int32 dag_param_read (FILE *fp,
283  char *param,
284  int32 *lineno
285  );
286 
287 
292 int32 dag_bestpath (
293  dag_t* dagp,
294  daglink_t *l,
295  dagnode_t *src,
296  float64 lwf,
297  dict_t *dict,
298  lm_t *lm,
299  s3lmwid32_t *dict2lmwid
300  );
301 
302 
306 int32 dag_chk_linkscr (
307  dag_t *dagp
308  );
309 
313 int32 dag_destroy (
314  dag_t *dagp
315  );
316 
323 void dag_compute_hscr(dag_t *dag, dict_t *dict, lm_t *lm, float64 lwf);
324 
331  daglink_t *l,
332  float64 lwf,
333  dict_t* dict,
334  fillpen_t* fpen
335  );
336 
341 void dag_write_header(FILE *fp, cmd_ln_t *config);
342 
347 int32 dag_write(dag_t * dag,
348  const char *filename,
349  lm_t * lm,
350  dict_t * dict);
351 
356 int32 dag_write_htk(dag_t *dag,
357  const char *filename,
358  const char *uttid,
359  lm_t * lm,
360  dict_t * dict);
361 
362 
374 srch_hyp_t *dag_search (dag_t *dagp,
375  char *utt,
376  float64 lwf,
377  dagnode_t *final,
378  dict_t *dict,
379  lm_t *lm,
380  fillpen_t *fpen
381  );
382 
387 void dag_add_fudge_edges (dag_t* dagp,
388  int32 fudge,
389  int32 min_ef_range,
390  void *lathist,
392  dict_t *dict
393  );
394 
395 
404 int32 dag_bypass_filler_nodes (dag_t* dagp,
405  float64 lwf,
406  dict_t *dict,
407  fillpen_t *fpen
408  );
409 
414 void dag_remove_bypass_links(dag_t *dag);
415 
420 void dag_remove_unreachable(dag_t *dag);
421 
436 dag_t * dag_load(char *file,
437  int32 maxedge,
438  float32 logbase,
439  int32 fudge,
440  dict_t * dict,
441  fillpen_t * fpen,
442  cmd_ln_t *config,
443  logmath_t *logmath
444  );
445 
446 #ifdef __cplusplus
447 }
448 #endif
449 
450 
451 #endif