00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include <string.h>
00039
00040 #include "dict2pid.h"
00041 #include "hmm.h"
00042
00043
00048 void
00049 compress_table(s3ssid_t * uncomp_tab, s3ssid_t * com_tab,
00050 s3cipid_t * ci_map, int32 n_ci)
00051 {
00052 int32 found;
00053 int32 r;
00054 int32 tmp_r;
00055
00056 for (r = 0; r < n_ci; r++) {
00057 com_tab[r] = BAD_S3SSID;
00058 ci_map[r] = BAD_S3CIPID;
00059 }
00061 for (r = 0; r < n_ci; r++) {
00062
00063 found = 0;
00064 for (tmp_r = 0; tmp_r < r && com_tab[tmp_r] != BAD_S3SSID; tmp_r++) {
00065 if (uncomp_tab[r] == com_tab[tmp_r]) {
00066 found = 1;
00067 ci_map[r] = tmp_r;
00068 break;
00069 }
00070 }
00071
00072 if (found == 0) {
00073 com_tab[tmp_r] = uncomp_tab[r];
00074 ci_map[r] = tmp_r;
00075 }
00076 }
00077 }
00078
00079
00080 static void
00081 compress_right_context_tree(dict2pid_t * d2p,
00082 s3ssid_t ***rdiph_rc)
00083 {
00084 int32 n_ci;
00085 int32 b, l, r;
00086 s3ssid_t *rmap;
00087 s3ssid_t *tmpssid;
00088 s3cipid_t *tmpcimap;
00089 bin_mdef_t *mdef = d2p->mdef;
00090 size_t alloc;
00091
00092 n_ci = mdef->n_ciphone;
00093
00094 tmpssid = ckd_calloc(n_ci, sizeof(s3ssid_t));
00095 tmpcimap = ckd_calloc(n_ci, sizeof(s3cipid_t));
00096
00097 d2p->rssid =
00098 (xwdssid_t **) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t *));
00099 alloc = mdef->n_ciphone * sizeof(xwdssid_t *);
00100
00101 for (b = 0; b < n_ci; b++) {
00102 d2p->rssid[b] =
00103 (xwdssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t));
00104 alloc += mdef->n_ciphone * sizeof(xwdssid_t);
00105
00106 for (l = 0; l < n_ci; l++) {
00107 rmap = rdiph_rc[b][l];
00108 compress_table(rmap, tmpssid, tmpcimap, mdef->n_ciphone);
00109
00110 for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID;
00111 r++);
00112
00113 if (tmpssid[0] != BAD_S3SSID) {
00114 d2p->rssid[b][l].ssid = ckd_calloc(r, sizeof(s3ssid_t));
00115 memcpy(d2p->rssid[b][l].ssid, tmpssid,
00116 r * sizeof(s3ssid_t));
00117 d2p->rssid[b][l].cimap =
00118 ckd_calloc(mdef->n_ciphone, sizeof(s3cipid_t));
00119 memcpy(d2p->rssid[b][l].cimap, tmpcimap,
00120 (mdef->n_ciphone) * sizeof(s3cipid_t));
00121 d2p->rssid[b][l].n_ssid = r;
00122 }
00123 else {
00124 d2p->rssid[b][l].ssid = NULL;
00125 d2p->rssid[b][l].cimap = NULL;
00126 d2p->rssid[b][l].n_ssid = 0;
00127 }
00128 }
00129 }
00130
00131 E_INFO("Allocated %d bytes (%d KiB) for word-final triphones\n",
00132 (int)alloc, (int)alloc / 1024);
00133 ckd_free(tmpssid);
00134 ckd_free(tmpcimap);
00135 }
00136
00137 static void
00138 compress_left_right_context_tree(dict2pid_t * d2p)
00139 {
00140 int32 n_ci;
00141 int32 b, l, r;
00142 s3ssid_t *rmap;
00143 s3ssid_t *tmpssid;
00144 s3cipid_t *tmpcimap;
00145 bin_mdef_t *mdef = d2p->mdef;
00146 size_t alloc;
00147
00148 n_ci = mdef->n_ciphone;
00149
00150 tmpssid = ckd_calloc(n_ci, sizeof(s3ssid_t));
00151 tmpcimap = ckd_calloc(n_ci, sizeof(s3cipid_t));
00152
00153 assert(d2p->lrdiph_rc);
00154
00155 d2p->lrssid =
00156 (xwdssid_t **) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t *));
00157 alloc = mdef->n_ciphone * sizeof(xwdssid_t *);
00158
00159 for (b = 0; b < n_ci; b++) {
00160
00161 d2p->lrssid[b] =
00162 (xwdssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t));
00163 alloc += mdef->n_ciphone * sizeof(xwdssid_t);
00164
00165 for (l = 0; l < n_ci; l++) {
00166 rmap = d2p->lrdiph_rc[b][l];
00167
00168 compress_table(rmap, tmpssid, tmpcimap, mdef->n_ciphone);
00169
00170 for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID;
00171 r++);
00172
00173 if (tmpssid[0] != BAD_S3SSID) {
00174 d2p->lrssid[b][l].ssid = ckd_calloc(r, sizeof(s3ssid_t));
00175 memcpy(d2p->lrssid[b][l].ssid, tmpssid,
00176 r * sizeof(s3ssid_t));
00177 d2p->lrssid[b][l].cimap =
00178 ckd_calloc(mdef->n_ciphone, sizeof(s3cipid_t));
00179 memcpy(d2p->lrssid[b][l].cimap, tmpcimap,
00180 (mdef->n_ciphone) * sizeof(s3cipid_t));
00181 d2p->lrssid[b][l].n_ssid = r;
00182 }
00183 else {
00184 d2p->lrssid[b][l].ssid = NULL;
00185 d2p->lrssid[b][l].cimap = NULL;
00186 d2p->lrssid[b][l].n_ssid = 0;
00187 }
00188 }
00189 }
00190
00191
00192 ckd_free(tmpssid);
00193 ckd_free(tmpcimap);
00194
00195 E_INFO("Allocated %d bytes (%d KiB) for single-phone word triphones\n",
00196 (int)alloc, (int)alloc / 1024);
00197 }
00198
00203 int32
00204 get_rc_nssid(dict2pid_t * d2p, s3wid_t w)
00205 {
00206 int32 pronlen;
00207 s3cipid_t b, lc;
00208 dict_t *dict = d2p->dict;
00209
00210 pronlen = dict->word[w].pronlen;
00211 b = dict->word[w].ciphone[pronlen - 1];
00212
00213 if (pronlen == 1) {
00214
00215
00216
00217
00218 return (d2p->lrssid[b][0].n_ssid);
00219 }
00220 else {
00221
00222 lc = dict->word[w].ciphone[pronlen - 2];
00223 return (d2p->rssid[b][lc].n_ssid);
00224 }
00225
00226 }
00227
00228 s3cipid_t *
00229 dict2pid_get_rcmap(dict2pid_t * d2p, s3wid_t w)
00230 {
00231 int32 pronlen;
00232 s3cipid_t b, lc;
00233 dict_t *dict = d2p->dict;
00234
00235 pronlen = dict->word[w].pronlen;
00236 b = dict->word[w].ciphone[pronlen - 1];
00237
00238 if (pronlen == 1) {
00239
00240
00241
00242
00243 return (d2p->lrssid[b][0].cimap);
00244 }
00245 else {
00246
00247 lc = dict->word[w].ciphone[pronlen - 2];
00248 return (d2p->rssid[b][lc].cimap);
00249 }
00250 }
00251
00252 static void
00253 free_compress_map(xwdssid_t ** tree, int32 n_ci)
00254 {
00255 int32 b, l;
00256 for (b = 0; b < n_ci; b++) {
00257 for (l = 0; l < n_ci; l++) {
00258 ckd_free(tree[b][l].ssid);
00259 ckd_free(tree[b][l].cimap);
00260 }
00261 ckd_free(tree[b]);
00262 }
00263 ckd_free(tree);
00264 }
00265
00266 static void
00267 populate_lrdiph(dict2pid_t *d2p, s3ssid_t ***rdiph_rc, s3cipid_t b)
00268 {
00269 bin_mdef_t *mdef = d2p->mdef;
00270 s3cipid_t l, r;
00271
00272 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
00273 for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
00274 s3pid_t p;
00275 p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
00276 (s3cipid_t) l,
00277 (s3cipid_t) r,
00278 WORD_POSN_SINGLE);
00279 d2p->lrdiph_rc[b][l][r]
00280 = bin_mdef_pid2ssid(mdef, p);
00281 if (r == bin_mdef_silphone(mdef))
00282 d2p->ldiph_lc[b][r][l]
00283 = bin_mdef_pid2ssid(mdef, p);
00284 if (rdiph_rc && l == bin_mdef_silphone(mdef))
00285 rdiph_rc[b][l][r]
00286 = bin_mdef_pid2ssid(mdef, p);
00287 assert(IS_S3SSID(bin_mdef_pid2ssid(mdef, p)));
00288 E_DEBUG(2,("%s(%s,%s) => %d / %d\n",
00289 bin_mdef_ciphone_str(mdef, b),
00290 bin_mdef_ciphone_str(mdef, l),
00291 bin_mdef_ciphone_str(mdef, r),
00292 p, bin_mdef_pid2ssid(mdef, p)));
00293 }
00294 }
00295 }
00296
00297 int
00298 dict2pid_add_word(dict2pid_t *d2p,
00299 int32 wid)
00300 {
00301 bin_mdef_t *mdef = d2p->mdef;
00302 dict_t *d = d2p->dict;
00303
00304 if (dict_pronlen(d, wid) > 1) {
00305 s3cipid_t l;
00306
00307
00308 if (d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][0]
00309 == BAD_S3SSID) {
00310 E_INFO("Filling in left-context diphones for %s(?,%s)\n",
00311 bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid)),
00312 bin_mdef_ciphone_str(mdef, dict_second_phone(d, wid)));
00313 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
00314 s3ssid_t p
00315 = bin_mdef_phone_id_nearest(mdef,
00316 dict_first_phone(d, wid), l,
00317 dict_second_phone(d, wid),
00318 WORD_POSN_BEGIN);
00319 d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][l]
00320 = bin_mdef_pid2ssid(mdef, p);
00321 }
00322 }
00323 if (d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid
00324 == 0) {
00325 s3ssid_t *rmap;
00326 s3ssid_t *tmpssid;
00327 s3cipid_t *tmpcimap;
00328 s3cipid_t r;
00329
00330 E_INFO("Filling in right-context diphones for %s(%s,?)\n",
00331 bin_mdef_ciphone_str(mdef, dict_last_phone(d, wid)),
00332 bin_mdef_ciphone_str(mdef, dict_second_last_phone(d, wid)));
00333 rmap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*rmap));
00334 for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
00335 s3ssid_t p
00336 = bin_mdef_phone_id_nearest(mdef,
00337 dict_last_phone(d, wid),
00338 dict_second_last_phone(d, wid), r,
00339 WORD_POSN_END);
00340 rmap[r] = bin_mdef_pid2ssid(mdef, p);
00341 }
00342 tmpssid = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpssid));
00343 tmpcimap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpcimap));
00344 compress_table(rmap, tmpssid, tmpcimap, bin_mdef_n_ciphone(mdef));
00345 for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; r++)
00346 ;
00347 d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].ssid = tmpssid;
00348 d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].cimap = tmpcimap;
00349 d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid = r;
00350 ckd_free(rmap);
00351 }
00352 }
00353 else {
00354
00355
00356 E_INFO("Filling in context triphones for %s(?,?)\n",
00357 bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid)));
00358 if (d2p->lrdiph_rc[dict_first_phone(d, wid)][0][0] == BAD_S3SSID) {
00359 populate_lrdiph(d2p, NULL, dict_first_phone(d, wid));
00360 }
00361 }
00362
00363 return 0;
00364 }
00365
00366 s3ssid_t
00367 dict2pid_internal(dict2pid_t *d2p,
00368 int32 wid,
00369 int pos)
00370 {
00371 int b, l, r, p;
00372 dict_t *dict = d2p->dict;
00373 bin_mdef_t *mdef = d2p->mdef;
00374
00375 if (pos == 0 || pos == dict_pronlen(dict, wid))
00376 return BAD_S3SSID;
00377
00378 b = dict_pron(dict, wid, pos);
00379 l = dict_pron(dict, wid, pos - 1);
00380 r = dict_pron(dict, wid, pos + 1);
00381 p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
00382 (s3cipid_t) l, (s3cipid_t) r,
00383 WORD_POSN_INTERNAL);
00384 return bin_mdef_pid2ssid(mdef, p);
00385 }
00386
00387 dict2pid_t *
00388 dict2pid_build(bin_mdef_t * mdef, dict_t * dict)
00389 {
00390 dict2pid_t *dict2pid;
00391 s3ssid_t ***rdiph_rc;
00392 bitvec_t *ldiph, *rdiph, *single;
00393 int32 pronlen;
00394 int32 b, l, r, w, p;
00395
00396 E_INFO("Building PID tables for dictionary\n");
00397 assert(mdef);
00398 assert(dict);
00399
00400 dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t));
00401 dict2pid->refcount = 1;
00402 dict2pid->mdef = bin_mdef_retain(mdef);
00403 dict2pid->dict = dict_retain(dict);
00404 dict2pid->n_dictsize = dict_size(dict);
00405 E_INFO("Allocating %d^3 * %d bytes (%d KiB) for word-initial triphones\n",
00406 mdef->n_ciphone, sizeof(s3ssid_t),
00407 mdef->n_ciphone * mdef->n_ciphone * mdef->n_ciphone * sizeof(s3ssid_t) / 1024);
00408 dict2pid->ldiph_lc =
00409 (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone,
00410 mdef->n_ciphone, sizeof(s3ssid_t));
00411
00412 rdiph_rc =
00413 (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone,
00414 mdef->n_ciphone, sizeof(s3ssid_t));
00415
00416 dict2pid->n_ci = mdef->n_ciphone;
00417 dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone,
00418 mdef->n_ciphone,
00419 mdef->n_ciphone,
00420 sizeof
00421 (s3ssid_t));
00422
00423
00424 for (b = 0; b < mdef->n_ciphone; ++b) {
00425 for (r = 0; r < mdef->n_ciphone; ++r) {
00426 for (l = 0; l < mdef->n_ciphone; ++l) {
00427 dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID;
00428 dict2pid->lrdiph_rc[b][l][r] = BAD_S3SSID;
00429 rdiph_rc[b][l][r] = BAD_S3SSID;
00430 }
00431 }
00432 }
00433
00434
00435 ldiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone);
00436 rdiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone);
00437 single = bitvec_alloc(mdef->n_ciphone);
00438
00439 for (w = 0; w < dict2pid->n_dictsize; w++) {
00440 pronlen = dict_pronlen(dict, w);
00441
00442 if (pronlen >= 2) {
00443 b = dict_first_phone(dict, w);
00444 r = dict_second_phone(dict, w);
00445
00446 if (bitvec_is_clear(ldiph, b * mdef->n_ciphone + r)) {
00447
00448 bitvec_set(ldiph, b * mdef->n_ciphone + r);
00449
00450
00451 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
00452 p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
00453 (s3cipid_t) l, (s3cipid_t) r,
00454 WORD_POSN_BEGIN);
00455 dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p);
00456 }
00457 }
00458
00459
00460
00461 l = dict_second_last_phone(dict, w);
00462 b = dict_last_phone(dict, w);
00463 if (bitvec_is_clear(rdiph, b * mdef->n_ciphone + l)) {
00464
00465 bitvec_set(rdiph, b * mdef->n_ciphone + l);
00466
00467 for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
00468 p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
00469 (s3cipid_t) l, (s3cipid_t) r,
00470 WORD_POSN_END);
00471 rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p);
00472 }
00473 }
00474 }
00475 else if (pronlen == 1) {
00476 b = dict_pron(dict, w, 0);
00477 E_DEBUG(1,("Building tables for single phone word %s phone %d = %s\n",
00478 dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b)));
00479
00480 if (bitvec_is_clear(single, b)) {
00481 populate_lrdiph(dict2pid, rdiph_rc, b);
00482 bitvec_set(single, b);
00483 }
00484 }
00485 }
00486
00487 bitvec_free(ldiph);
00488 bitvec_free(rdiph);
00489 bitvec_free(single);
00490
00491
00492 compress_right_context_tree(dict2pid, rdiph_rc);
00493 compress_left_right_context_tree(dict2pid);
00494
00495 ckd_free_3d(rdiph_rc);
00496
00497 dict2pid_report(dict2pid);
00498 return dict2pid;
00499 }
00500
00501 dict2pid_t *
00502 dict2pid_retain(dict2pid_t *d2p)
00503 {
00504 ++d2p->refcount;
00505 return d2p;
00506 }
00507
00508 int
00509 dict2pid_free(dict2pid_t * d2p)
00510 {
00511 if (d2p == NULL)
00512 return 0;
00513 if (--d2p->refcount > 0)
00514 return d2p->refcount;
00515
00516 if (d2p->ldiph_lc)
00517 ckd_free_3d((void ***) d2p->ldiph_lc);
00518
00519 if (d2p->lrdiph_rc)
00520 ckd_free_3d((void ***) d2p->lrdiph_rc);
00521
00522 if (d2p->rssid)
00523 free_compress_map(d2p->rssid, d2p->n_ci);
00524
00525 if (d2p->lrssid)
00526 free_compress_map(d2p->lrssid, d2p->n_ci);
00527
00528 bin_mdef_free(d2p->mdef);
00529 dict_free(d2p->dict);
00530 ckd_free(d2p);
00531 return 0;
00532 }
00533
00534 void
00535 dict2pid_report(dict2pid_t * d2p)
00536 {
00537 }
00538
00539 void
00540 dict2pid_dump(FILE * fp, dict2pid_t * d2p)
00541 {
00542 int32 w, p, pronlen;
00543 int32 i, j, b, l, r;
00544 bin_mdef_t *mdef = d2p->mdef;
00545 dict_t *dict = d2p->dict;
00546
00547 fprintf(fp, "# INTERNAL (wd comssid ssid ssid ... ssid comssid)\n");
00548 for (w = 0; w < d2p->n_dictsize; w++) {
00549 fprintf(fp, "%30s ", dict_wordstr(dict, w));
00550
00551 pronlen = dict_pronlen(dict, w);
00552 for (p = 0; p < pronlen; p++)
00553 fprintf(fp, " %5d", dict2pid_internal(d2p, w, p));
00554 fprintf(fp, "\n");
00555 }
00556 fprintf(fp, "#\n");
00557
00558 fprintf(fp, "# LDIPH_LC (b r l ssid)\n");
00559 for (b = 0; b < bin_mdef_n_ciphone(mdef); b++) {
00560 for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
00561 for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
00562 if (IS_S3SSID(d2p->ldiph_lc[b][r][l]))
00563 fprintf(fp, "%6s %6s %6s %5d\n", bin_mdef_ciphone_str(mdef, (s3cipid_t) b), bin_mdef_ciphone_str(mdef, (s3cipid_t) r), bin_mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->ldiph_lc[b][r][l]);
00564 }
00565 }
00566 }
00567 fprintf(fp, "#\n");
00568
00569 fprintf(fp, "# SSEQ %d (senid senid ...)\n", mdef->n_sseq);
00570 for (i = 0; i < mdef->n_sseq; i++) {
00571 fprintf(fp, "%5d ", i);
00572 for (j = 0; j < bin_mdef_n_emit_state(mdef); j++)
00573 fprintf(fp, " %5d", mdef->sseq[i][j]);
00574 fprintf(fp, "\n");
00575 }
00576 fprintf(fp, "#\n");
00577 fprintf(fp, "# END\n");
00578
00579 fflush(fp);
00580 }