48 #include <sphinxbase/ckd_alloc.h>
49 #include <sphinxbase/listelem_alloc.h>
50 #include <sphinxbase/strfuncs.h>
51 #include <sphinxbase/err.h>
52 #include <sphinxbase/pio.h>
66 int32 score, int32 ef)
71 for (fwdlink = from->
exits; fwdlink; fwdlink = fwdlink->next)
72 if (fwdlink->link->
to == to)
75 if (fwdlink == NULL) {
88 link->best_prev = NULL;
90 fwdlink->link = revlink->link = link;
91 fwdlink->next = from->
exits;
92 from->
exits = fwdlink;
99 fwdlink->link->
ascr = score;
100 fwdlink->link->
ef = ef;
112 for (node = dag->
nodes; node; node = node->
next) {
114 if (node == dag->
end || !dict_filler_word(dag->
dict, node->
basewid))
118 for (revlink = node->
entries; revlink; revlink = revlink->next) {
123 score += rlink->
ascr;
129 for (forlink = node->
exits; forlink; forlink = forlink->next) {
131 if (flink->
to && rlink->
from &&
134 score + flink->
ascr, flink->
ef);
147 for (x = node->
exits; x; x = next_x) {
149 x->link->
from = NULL;
152 for (x = node->
entries; x; x = next_x) {
167 for (x = node->
exits; x; x = next_x) {
169 if (x->link->
to == NULL) {
171 prev_x->next = next_x;
173 node->
exits = next_x;
181 for (x = node->
entries; x; x = next_x) {
183 if (x->link->
from == NULL) {
185 prev_x->next = next_x;
204 for (node = dag->
nodes; node; node = next_node) {
205 next_node = node->
next;
208 prev_node->
next = next_node;
210 dag->
nodes = next_node;
212 delete_node(dag, node);
220 for (node = dag->
nodes; node; node = node->
next) {
228 remove_dangling_links(dag, node);
239 initial = dag->
start;
242 E_INFO(
"Writing lattice file: %s\n", filename);
243 if ((fp = fopen(filename,
"w")) == NULL) {
244 E_ERROR_SYSTEM(
"Failed to open lattice file '%s' for writing", filename);
249 fprintf(fp,
"# getcwd: /this/is/bogus\n");
250 fprintf(fp,
"# -logbase %e\n", logmath_get_base(dag->
lmath));
253 fprintf(fp,
"Frames %d\n", dag->
n_frames);
256 for (i = 0, d = dag->
nodes; d; d = d->
next, i++);
258 "Nodes %d (NODEID WORD STARTFRAME FIRST-ENDFRAME LAST-ENDFRAME)\n",
260 for (i = 0, d = dag->
nodes; d; d = d->
next, i++) {
262 fprintf(fp,
"%d %s %d %d %d ; %d\n",
263 i, dict_wordstr(dag->
dict, d->
wid),
268 fprintf(fp,
"Initial %d\nFinal %d\n", initial->
id, final->id);
272 fprintf(fp,
"BestSegAscr %d (NODEID ENDFRAME ASCORE)\n",
276 fprintf(fp,
"Edges (FROM-NODEID TO-NODEID ASCORE)\n");
279 for (l = d->
exits; l; l = l->next) {
282 fprintf(fp,
"%d %d %d\n",
286 fprintf(fp,
"End\n");
297 int32 j, n_links, n_nodes;
299 initial = dag->
start;
302 E_INFO(
"Writing lattice file: %s\n", filename);
303 if ((fp = fopen(filename,
"w")) == NULL) {
304 E_ERROR_SYSTEM(
"Failed to open lattice file '%s' for writing", filename);
308 for (n_links = n_nodes = 0, d = dag->
nodes; d; d = d->
next) {
313 for (l = d->
exits; l; l = l->next) {
323 fprintf(fp,
"# Lattice generated by PocketSphinx\n");
324 fprintf(fp,
"#\n# Header\n#\n");
325 fprintf(fp,
"VERSION=1.0\n");
326 fprintf(fp,
"start=%d\n", initial->
id);
327 fprintf(fp,
"end=%d\n", final->id);
330 fprintf(fp,
"N=%d\tL=%d\n", n_nodes, n_links);
331 fprintf(fp,
"#\n# Node definitions\n#\n");
333 char const *word = dict_wordstr(dag->
dict, d->
wid);
334 char const *c = strrchr(word,
'(');
339 altpron = atoi(c + 1);
340 word = dict_basestr(dag->
dict, d->
wid);
341 if (d->
wid == dict_startwid(dag->
dict))
342 word =
"!SENT_START";
343 else if (d->
wid == dict_finishwid(dag->
dict))
345 else if (dict_filler_word(dag->
dict, d->
wid))
347 fprintf(fp,
"I=%d\tt=%.2f\tW=%s\tv=%d\n",
351 fprintf(fp,
"#\n# Link definitions\n#\n");
352 for (j = 0, d = dag->
nodes; d; d = d->
next) {
356 for (l = d->
exits; l; l = l->next) {
361 fprintf(fp,
"J=%d\tS=%d\tE=%d\ta=%f\tp=%g\n", j++,
374 dag_param_read(lineiter_t *li,
char *param)
378 while ((li = lineiter_next(li)) != NULL) {
382 if (li->buf[0] ==
'#')
386 c = strchr(li->buf,
' ');
387 if (c == NULL)
continue;
390 if (strncmp(li->buf, param, strlen(param)) == 0
391 && sscanf(c + 1,
"%d", &n) == 1)
404 for (l = d->
entries; l; l = l->next)
406 dag_mark_reachable(l->link->
from);
422 int32 pip, silpen, fillpen;
424 dag = ckd_calloc(1,
sizeof(*dag));
433 dag->
dict = dict_init(NULL, NULL);
434 dag->
lmath = logmath_init(1.0001, 0, FALSE);
446 E_INFO(
"Reading DAG file: %s\n", file);
447 if ((fp = fopen_compchk(file, &ispipe)) == NULL) {
448 E_ERROR_SYSTEM(
"Failed to open DAG file '%s' for reading", file);
451 line = lineiter_start(fp);
455 E_ERROR(
"Premature EOF(%s)\n", file);
458 if (strncmp(line->buf,
"# getcwd: ", 10) != 0) {
459 E_ERROR(
"%s does not begin with '# getcwd: '\n%s", file, line->buf);
462 if ((line = lineiter_next(line)) == NULL) {
463 E_ERROR(
"Premature EOF(%s)\n", file);
466 if ((strncmp(line->buf,
"# -logbase ", 11) != 0)
467 || (sscanf(line->buf + 11,
"%lf", &lb) != 1)) {
468 E_WARN(
"%s: Cannot find -logbase in header\n", file);
472 if (dag->
lmath == NULL)
473 dag->
lmath = logmath_init(lb, 0, TRUE);
475 float32 pb = logmath_get_base(dag->
lmath);
476 if (fabs(lb - pb) >= 0.0001) {
477 E_WARN(
"Inconsistent logbases: %f vs %f: will compensate\n", lb, pb);
478 logratio = (float32)(log(lb) / log(pb));
479 E_INFO(
"Lattice log ratio: %f\n", logratio);
483 dag->
n_frames = dag_param_read(line,
"Frames");
485 E_ERROR(
"Frames parameter missing or invalid\n");
489 n_nodes = dag_param_read(line,
"Nodes");
491 E_ERROR(
"Nodes parameter missing or invalid\n");
496 darray = ckd_calloc(n_nodes,
sizeof(*darray));
497 for (i = 0; i < n_nodes; i++) {
500 int seqid, sf, fef, lef;
503 if ((line = lineiter_next(line)) == NULL) {
504 E_ERROR(
"Premature EOF while loading Nodes(%s)\n", file);
509 sscanf(line->buf,
"%d %255s %d %d %d", &seqid, wd, &sf, &fef,
511 E_ERROR(
"Cannot parse line: %s, value of count %d\n", line->buf, k);
515 w = dict_wordid(dag->
dict, wd);
517 if (dag->
search == NULL) {
518 char *ww = ckd_salloc(wd);
519 if (dict_word2basestr(ww) != -1) {
521 dict_add_word(dag->
dict, ww, NULL, 0);
524 w = dict_add_word(dag->
dict, wd, NULL, 0);
527 E_ERROR(
"Unknown word in line: %s\n", line->buf);
533 E_ERROR(
"Seqno error: %s\n", line->buf);
557 k = dag_param_read(line,
"Initial");
558 if ((k < 0) || (k >= n_nodes)) {
559 E_ERROR(
"Initial node parameter missing or invalid\n");
562 dag->
start = darray[k];
565 k = dag_param_read(line,
"Final");
566 if ((k < 0) || (k >= n_nodes)) {
567 E_ERROR(
"Final node parameter missing or invalid\n");
570 dag->
end = darray[k];
573 if ((k = dag_param_read(line,
"BestSegAscr")) < 0) {
574 E_ERROR(
"BestSegAscr parameter missing\n");
577 for (i = 0; i < k; i++) {
578 if ((line = lineiter_next(line)) == NULL) {
579 E_ERROR(
"Premature EOF while (%s) ignoring BestSegAscr\n",
586 while ((line = lineiter_next(line)) != NULL) {
587 if (line->buf[0] ==
'#')
589 if (0 == strncmp(line->buf,
"Edges", 5))
593 E_ERROR(
"Edges missing\n");
596 while ((line = lineiter_next(line)) != NULL) {
600 if (sscanf(line->buf,
"%d %d %d", &from, &to, &ascr) != 3)
606 if (logratio != 1.0f)
607 ascr = (int32)(ascr * logratio);
610 if (strcmp(line->buf,
"End\n") != 0) {
611 E_ERROR(
"Terminating 'End' missing\n");
615 fclose_comp(fp, ispipe);
621 if (dict_filler_word(dag->
dict, dag->
end->
wid))
623 ? ps_search_finish_wid(dag->
search)
624 : dict_wordid(dag->
dict, S3_FINISH_WORD);
627 dag_mark_reachable(dag->
end);
637 pip = logmath_log(dag->
lmath, cmd_ln_float32_r(ps->
config,
"-pip"));
638 silpen = pip + logmath_log(dag->
lmath,
639 cmd_ln_float32_r(ps->
config,
"-silprob"));
640 fillpen = pip + logmath_log(dag->
lmath,
641 cmd_ln_float32_r(ps->
config,
"-fillprob"));
648 E_ERROR(
"Failed to load %s\n", file);
650 if (fp) fclose_comp(fp, ispipe);
666 dag = ckd_calloc(1,
sizeof(*dag));
668 dag->
dict = dict_retain(search->
dict);
694 logmath_free(dag->
lmath);
695 dict_free(dag->
dict);
737 if (out_fef) *out_fef = (int16)node->
fef;
738 if (out_lef) *out_lef = (int16)node->
lef;
745 return dict_wordstr(dag->
dict, node->
wid);
759 int32 bestpost = logmath_get_zero(dag->
lmath);
761 for (links = node->
exits; links; links = links->next) {
762 int32 post = links->link->
alpha + links->link->
beta - dag->
norm;
763 if (post > bestpost) {
764 if (out_link) *out_link = links->link;
818 if (out_src) *out_src = link->
from;
825 if (link->
from == NULL)
833 if (link->
from == NULL)
841 return link->best_prev;
865 len += strlen(wstr) + 1;
867 for (l = link; l; l = l->best_prev) {
871 len += strlen(wstr) + 1;
877 dag->
hyp_str = ckd_calloc(1, len+1);
884 memcpy(c, wstr, len);
891 for (l = link; l; l = l->best_prev) {
897 memcpy(c, wstr, len);
912 ngram_model_t *lmset;
916 if (0 != strcmp(ps_search_name(seg->
search),
"ngram")) {
924 if (link->best_prev == NULL) {
943 if (link->best_prev->best_prev)
970 logmath_t *lmath = ps_search_acmod(seg->
search)->lmath;
977 for (n = node; n; n = n->
alt) {
978 for (x = n->
exits; x; x = x->next) {
981 seg->
prob = logmath_add(lmath, seg->
prob,
986 seg->
word = dict_wordstr(ps_search_dict(seg->
search), node->
wid);
990 ps_lattice_compute_lscr(seg, link, to);
998 ckd_free(itor->
links);
1009 ps_lattice_seg_free(seg);
1014 ps_lattice_link2itor(seg, itor->
links[itor->
cur - 1], TRUE);
1017 ps_lattice_link2itor(seg, itor->
links[itor->
cur], FALSE);
1024 ps_lattice_seg_next,
1038 itor = ckd_calloc(1,
sizeof(*itor));
1039 itor->
base.
vt = &ps_lattice_segfuncs;
1045 for (l = link; l; l = l->best_prev) {
1055 for (l = link; l; l = l->best_prev) {
1056 itor->
links[cur] = l;
1060 ps_lattice_link2itor((
ps_seg_t *)itor, itor->
links[0], FALSE);
1096 link = dag->
q_head->link;
1123 for (node = dag->
nodes; node; node = node->
next)
1124 node->info.
fanin = 0;
1125 for (node = dag->
nodes; node; node = node->
next) {
1126 for (x = node->
exits; x; x = x->next)
1131 if (start == NULL) start = dag->
start;
1132 for (x = start->
exits; x; x = x->next)
1151 if (next->
to->info.
fanin == 0) {
1154 if (end == NULL) end = dag->
end;
1155 if (next->
to == end) {
1164 for (x = next->
to->
exits; x; x = x->next)
1180 for (node = dag->
nodes; node; node = node->
next) {
1181 node->info.
fanin = 0;
1182 for (x = node->
exits; x; x = x->next)
1187 if (end == NULL) end = dag->
end;
1188 for (x = end->
entries; x; x = x->next)
1210 if (start == NULL) start = dag->
start;
1211 if (next->
from == start) {
1239 float32 lwf, float32 ascale)
1255 for (node = dag->
nodes; node; node = node->
next) {
1256 for (x = node->
exits; x; x = x->next) {
1258 x->link->
alpha = logmath_get_zero(lmath);
1261 for (x = dag->
start->
exits; x; x = x->next) {
1265 if (dict_filler_word(ps_search_dict(search), x->link->
to->
basewid)
1266 && x->link->
to != dag->
end)
1272 (ngram_bg_score(lmset, x->link->
to->
basewid,
1273 ps_search_start_wid(search), &n_used)
1278 x->link->best_prev = NULL;
1286 int32 bprob, n_used;
1291 if (dict_filler_word(ps_search_dict(search), link->
to->
basewid) && link->
to != dag->
end)
1296 assert(link->
path_scr != MAX_NEG_INT32);
1300 bprob = ngram_ng_prob(lmset,
1310 for (x = link->
to->
exits; x; x = x->next) {
1311 int32 tscore, score;
1314 if (dict_filler_word(ps_search_dict(search), x->link->
to->
basewid)
1315 && x->link->
to != dag->
end)
1319 x->link->
alpha = logmath_add(lmath, x->link->
alpha, link->
alpha + bprob);
1322 tscore = (ngram_tg_score(lmset, x->link->
to->
basewid,
1332 x->link->best_prev = link;
1340 bestescr = MAX_NEG_INT32;
1344 dag->
norm = logmath_get_zero(lmath);
1345 for (x = dag->
end->
entries; x; x = x->next) {
1346 int32 bprob, n_used;
1348 if (dict_filler_word(ps_search_dict(search), x->link->
from->
basewid))
1351 bprob = ngram_ng_prob(lmset,
1356 dag->
norm = logmath_add(lmath, dag->
norm, x->link->
alpha + bprob);
1365 E_INFO(
"Normalizer P(O) = alpha(%s:%d:%d) = %d\n",
1375 ngram_model_t *lmset;
1379 if (dag->
search && 0 == strcmp(ps_search_name(dag->
search),
"ngram"))
1393 jprob += ngram_ng_prob(lmset, link->
to->
basewid,
1400 link = link->best_prev;
1403 E_INFO(
"Joint P(O,S) = %d P(S|O) = %d\n", jprob, jprob - dag->
norm);
1423 for (node = dag->
nodes; node; node = node->
next) {
1424 for (x = node->
exits; x; x = x->next) {
1425 x->link->
beta = logmath_get_zero(lmath);
1430 bestescr = MAX_NEG_INT32;
1434 int32 bprob, n_used;
1439 if (dict_filler_word(ps_search_dict(search), link->
to->
basewid) && link->
to != dag->
end)
1444 bprob = ngram_ng_prob(lmset, link->
to->
basewid,
1449 if (link->
to == dag->
end) {
1462 for (x = link->
to->
exits; x; x = x->next) {
1463 if (dict_filler_word(ps_search_dict(search), x->link->
to->
basewid) && x->link->
to != dag->
end)
1465 link->
beta = logmath_add(lmath, link->
beta,
1466 x->link->
beta + bprob
1473 return ps_lattice_joint(dag, bestend, ascale) - dag->
norm;
1488 for (x = link->
from->
exits; x; x = next) {
1490 if (x->link == link) {
1500 for (x = link->
to->
entries; x; x = next) {
1502 if (x->link == link) {
1515 dag_mark_reachable(dag->
end);
1522 #define MAX_PATHS 500
1523 #define MAX_HYP_TRIES 10000
1535 int32 bestscore, score;
1542 for (x = from->
exits; x; x = x->next) {
1545 score = best_rem_score(nbest, x->link->
to);
1546 score += x->link->
ascr;
1548 score += (ngram_bg_score(nbest->lmset, x->link->
to->
basewid,
1571 for (i = 0, p = nbest->path_list; (i < MAX_PATHS) && p; p = p->
next, i++) {
1578 if (i < MAX_PATHS) {
1582 nbest->path_list = newpath;
1584 prev->
next = newpath;
1586 nbest->path_tail = newpath;
1589 nbest->n_hyp_insert++;
1590 nbest->insert_depth += i;
1594 nbest->path_tail = prev;
1596 nbest->n_path = MAX_PATHS;
1599 nbest->n_hyp_reject++;
1600 for (; p; p = newpath) {
1603 nbest->n_hyp_reject++;
1614 int32 total_score, tail_score;
1617 for (x = path->
node->
exits; x; x = x->next) {
1626 newpath->
node = x->link->
to;
1631 newpath->
score += nbest->lwf
1632 * (ngram_tg_score(nbest->lmset, newpath->
node->
basewid,
1638 newpath->
score += nbest->lwf
1639 * (ngram_bg_score(nbest->lmset, newpath->
node->
basewid,
1645 nbest->n_hyp_tried++;
1649 if (nbest->n_path >= MAX_PATHS) {
1651 nbest->path_tail->
score
1653 if (total_score < tail_score) {
1655 nbest->n_hyp_reject++;
1660 path_insert(nbest, newpath, total_score);
1666 ngram_model_t *lmset,
1674 nbest = ckd_calloc(1,
sizeof(*nbest));
1676 nbest->lmset = lmset;
1688 for (node = dag->
nodes; node; node = node->
next) {
1689 if (node == dag->
end)
1691 else if (node->
exits == NULL)
1698 nbest->path_list = nbest->path_tail = NULL;
1699 for (node = dag->
nodes; node; node = node->
next) {
1700 if (node->
sf == sf) {
1704 best_rem_score(nbest, node);
1709 path->
score = nbest->lwf *
1711 ? ngram_bg_score(nbest->lmset, node->
basewid, w2, &n_used)
1712 : ngram_tg_score(nbest->lmset, node->
basewid, w2, w1, &n_used));
1731 while ((nbest->top = nbest->path_list) != NULL) {
1732 nbest->path_list = nbest->path_list->
next;
1733 if (nbest->top == nbest->path_tail)
1734 nbest->path_tail = NULL;
1738 if ((nbest->top->
node->
sf >= nbest->ef)
1739 || ((nbest->top->
node == dag->
end) &&
1740 (nbest->ef > dag->
end->
sf))) {
1747 if (nbest->top->
node->
fef < nbest->ef)
1748 path_extend(nbest, nbest->top);
1765 search = nbest->dag->
search;
1769 for (p = path; p; p = p->
parent) {
1770 if (dict_real_word(ps_search_dict(search), p->
node->
basewid)) {
1771 char *wstr = dict_wordstr(ps_search_dict(search), p->
node->
basewid);
1773 len += strlen(wstr) + 1;
1782 hyp = ckd_calloc(1, len);
1784 for (p = path; p; p = p->
parent) {
1785 if (dict_real_word(ps_search_dict(search), p->
node->
basewid)) {
1786 char *wstr = dict_wordstr(ps_search_dict(search), p->
node->
basewid);
1790 memcpy(c, wstr, len);
1799 nbest->
hyps = glist_add_ptr(nbest->
hyps, hyp);
1809 assert(itor->cur < itor->n_nodes);
1810 node = itor->nodes[itor->cur];
1811 if (itor->cur == itor->n_nodes - 1)
1812 seg->
ef = node->
lef;
1814 seg->
ef = itor->nodes[itor->cur + 1]->
sf - 1;
1815 seg->
word = dict_wordstr(ps_search_dict(seg->
search), node->
wid);
1824 ckd_free(itor->nodes);
1834 if (itor->cur == itor->n_nodes) {
1835 ps_astar_seg_free(seg);
1839 ps_astar_node2itor(itor);
1858 itor = ckd_calloc(1,
sizeof(*itor));
1859 itor->base.
vt = &ps_astar_segfuncs;
1861 itor->base.
lwf = lwf;
1862 itor->n_nodes = itor->cur = 0;
1863 for (p = path; p; p = p->
parent) {
1866 itor->nodes = ckd_calloc(itor->n_nodes,
sizeof(*itor->nodes));
1867 cur = itor->n_nodes - 1;
1868 for (p = path; p; p = p->
parent) {
1869 itor->nodes[cur] = p->
node;
1873 ps_astar_node2itor(itor);
1883 for (gn = nbest->
hyps; gn; gn = gnode_next(gn)) {
1884 ckd_free(gnode_ptr(gn));
1886 glist_free(nbest->
hyps);