00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050 #include <stdio.h>
00051 #include <string.h>
00052 #include <assert.h>
00053
00054
00055 #include <prim_type.h>
00056 #include <ckd_alloc.h>
00057 #include <byteorder.h>
00058 #include <case.h>
00059 #include <err.h>
00060
00061
00062 #include "mdef.h"
00063 #include "bin_mdef.h"
00064
00065 bin_mdef_t *
00066 bin_mdef_read_text(cmd_ln_t *config, const char *filename)
00067 {
00068 bin_mdef_t *bmdef;
00069 mdef_t *mdef;
00070 int i, nodes, ci_idx, lc_idx, rc_idx;
00071 int nchars;
00072
00073 if ((mdef = mdef_init((char *) filename, TRUE)) == NULL)
00074 return NULL;
00075
00076
00077 if (mdef->n_sen > BAD_SENID) {
00078 E_ERROR("Number of senones exceeds limit: %d > %d\n",
00079 mdef->n_sen, BAD_SENID);
00080 mdef_free(mdef);
00081 return NULL;
00082 }
00083 if (mdef->n_sseq > BAD_SSID) {
00084 E_ERROR("Number of senone sequences exceeds limit: %d > %d\n",
00085 mdef->n_sseq, BAD_SSID);
00086 mdef_free(mdef);
00087 return NULL;
00088 }
00089
00090
00091 bmdef = ckd_calloc(1, sizeof(*bmdef));
00092 bmdef->refcnt = 1;
00093
00094
00095 bmdef->n_ciphone = mdef->n_ciphone;
00096 bmdef->n_phone = mdef->n_phone;
00097 bmdef->n_emit_state = mdef->n_emit_state;
00098 bmdef->n_ci_sen = mdef->n_ci_sen;
00099 bmdef->n_sen = mdef->n_sen;
00100 bmdef->n_tmat = mdef->n_tmat;
00101 bmdef->n_sseq = mdef->n_sseq;
00102 bmdef->sseq = mdef->sseq;
00103 bmdef->cd2cisen = mdef->cd2cisen;
00104 bmdef->sen2cimap = mdef->sen2cimap;
00105 bmdef->n_ctx = 3;
00106 bmdef->sil = mdef->sil;
00107 mdef->sseq = NULL;
00108 mdef->cd2cisen = NULL;
00109 mdef->sen2cimap = NULL;
00110
00111
00112
00113
00114 bmdef->ciname = ckd_calloc(bmdef->n_ciphone, sizeof(*bmdef->ciname));
00115 nchars = 0;
00116 for (i = 0; i < bmdef->n_ciphone; ++i)
00117 nchars += strlen(mdef->ciphone[i].name) + 1;
00118 bmdef->ciname[0] = ckd_calloc(nchars, 1);
00119 strcpy(bmdef->ciname[0], mdef->ciphone[0].name);
00120 for (i = 1; i < bmdef->n_ciphone; ++i) {
00121 bmdef->ciname[i] =
00122 bmdef->ciname[i - 1] + strlen(bmdef->ciname[i - 1]) + 1;
00123 strcpy(bmdef->ciname[i], mdef->ciphone[i].name);
00124 if (i > 0 && strcmp(bmdef->ciname[i - 1], bmdef->ciname[i]) > 0) {
00125
00126 E_ERROR("Phone names are not in sorted order, sorry.");
00127 bin_mdef_free(bmdef);
00128 return NULL;
00129 }
00130 }
00131
00132
00133 bmdef->phone = ckd_calloc(bmdef->n_phone, sizeof(*bmdef->phone));
00134 for (i = 0; i < mdef->n_phone; ++i) {
00135 bmdef->phone[i].ssid = mdef->phone[i].ssid;
00136 bmdef->phone[i].tmat = mdef->phone[i].tmat;
00137 if (i < bmdef->n_ciphone) {
00138 bmdef->phone[i].info.ci.filler = mdef->ciphone[i].filler;
00139 }
00140 else {
00141 bmdef->phone[i].info.cd.wpos = mdef->phone[i].wpos;
00142 bmdef->phone[i].info.cd.ctx[0] = mdef->phone[i].ci;
00143 bmdef->phone[i].info.cd.ctx[1] = mdef->phone[i].lc;
00144 bmdef->phone[i].info.cd.ctx[2] = mdef->phone[i].rc;
00145 }
00146 }
00147
00148
00149
00150 nodes = lc_idx = ci_idx = rc_idx = 0;
00151 for (i = 0; i < N_WORD_POSN; ++i) {
00152 int j;
00153 for (j = 0; j < mdef->n_ciphone; ++j) {
00154 ph_lc_t *lc;
00155
00156 for (lc = mdef->wpos_ci_lclist[i][j]; lc; lc = lc->next) {
00157 ph_rc_t *rc;
00158 for (rc = lc->rclist; rc; rc = rc->next) {
00159 ++nodes;
00160 }
00161 ++nodes;
00162 ++rc_idx;
00163 }
00164 ++nodes;
00165 ++lc_idx;
00166 ++rc_idx;
00167 }
00168 ++nodes;
00169 ++ci_idx;
00170 ++lc_idx;
00171 ++rc_idx;
00172 }
00173 E_INFO("Allocating %d * %d bytes (%d KiB) for CD tree\n",
00174 nodes, sizeof(*bmdef->cd_tree),
00175 nodes * sizeof(*bmdef->cd_tree) / 1024);
00176 bmdef->n_cd_tree = nodes;
00177 bmdef->cd_tree = ckd_calloc(nodes, sizeof(*bmdef->cd_tree));
00178 for (i = 0; i < N_WORD_POSN; ++i) {
00179 int j;
00180
00181 bmdef->cd_tree[i].ctx = i;
00182 bmdef->cd_tree[i].n_down = mdef->n_ciphone;
00183 bmdef->cd_tree[i].c.down = ci_idx;
00184 #if 0
00185 E_INFO("%d => %c (%d@%d)\n",
00186 i, (WPOS_NAME)[i],
00187 bmdef->cd_tree[i].n_down, bmdef->cd_tree[i].c.down);
00188 #endif
00189
00190
00191 for (j = 0; j < mdef->n_ciphone; ++j) {
00192 ph_lc_t *lc;
00193
00194 bmdef->cd_tree[ci_idx].ctx = j;
00195 bmdef->cd_tree[ci_idx].c.down = lc_idx;
00196 for (lc = mdef->wpos_ci_lclist[i][j]; lc; lc = lc->next) {
00197 ph_rc_t *rc;
00198
00199 bmdef->cd_tree[lc_idx].ctx = lc->lc;
00200 bmdef->cd_tree[lc_idx].c.down = rc_idx;
00201 for (rc = lc->rclist; rc; rc = rc->next) {
00202 bmdef->cd_tree[rc_idx].ctx = rc->rc;
00203 bmdef->cd_tree[rc_idx].n_down = 0;
00204 bmdef->cd_tree[rc_idx].c.pid = rc->pid;
00205 #if 0
00206 E_INFO("%d => %s %s %s %c (%d@%d)\n",
00207 rc_idx,
00208 bmdef->ciname[j],
00209 bmdef->ciname[lc->lc],
00210 bmdef->ciname[rc->rc],
00211 (WPOS_NAME)[i],
00212 bmdef->cd_tree[rc_idx].n_down,
00213 bmdef->cd_tree[rc_idx].c.down);
00214 #endif
00215
00216 ++bmdef->cd_tree[lc_idx].n_down;
00217 ++rc_idx;
00218 }
00219
00220
00221
00222 if (bmdef->cd_tree[lc_idx].n_down == 0)
00223 bmdef->cd_tree[lc_idx].c.pid = -1;
00224 #if 0
00225 E_INFO("%d => %s %s %c (%d@%d)\n",
00226 lc_idx,
00227 bmdef->ciname[j],
00228 bmdef->ciname[lc->lc],
00229 (WPOS_NAME)[i],
00230 bmdef->cd_tree[lc_idx].n_down,
00231 bmdef->cd_tree[lc_idx].c.down);
00232 #endif
00233
00234 ++bmdef->cd_tree[ci_idx].n_down;
00235 ++lc_idx;
00236 }
00237
00238
00239 if (bmdef->cd_tree[ci_idx].n_down == 0)
00240 bmdef->cd_tree[ci_idx].c.pid = -1;
00241 #if 0
00242 E_INFO("%d => %d=%s (%d@%d)\n",
00243 ci_idx, j, bmdef->ciname[j],
00244 bmdef->cd_tree[ci_idx].n_down,
00245 bmdef->cd_tree[ci_idx].c.down);
00246 #endif
00247
00248 ++ci_idx;
00249 }
00250 }
00251
00252 mdef_free(mdef);
00253
00254 bmdef->alloc_mode = BIN_MDEF_FROM_TEXT;
00255 return bmdef;
00256 }
00257
00258 bin_mdef_t *
00259 bin_mdef_retain(bin_mdef_t *m)
00260 {
00261 ++m->refcnt;
00262 return m;
00263 }
00264
00265 int
00266 bin_mdef_free(bin_mdef_t * m)
00267 {
00268 if (m == NULL)
00269 return 0;
00270 if (--m->refcnt > 0)
00271 return m->refcnt;
00272
00273 switch (m->alloc_mode) {
00274 case BIN_MDEF_FROM_TEXT:
00275 ckd_free(m->ciname[0]);
00276 ckd_free(m->sseq[0]);
00277 ckd_free(m->phone);
00278 ckd_free(m->cd_tree);
00279 break;
00280 case BIN_MDEF_IN_MEMORY:
00281 ckd_free(m->ciname[0]);
00282 break;
00283 case BIN_MDEF_ON_DISK:
00284 break;
00285 }
00286 if (m->filemap)
00287 mmio_file_unmap(m->filemap);
00288 ckd_free(m->cd2cisen);
00289 ckd_free(m->sen2cimap);
00290 ckd_free(m->ciname);
00291 ckd_free(m->sseq);
00292 ckd_free(m);
00293 return 0;
00294 }
00295
00296 static const char format_desc[] =
00297 "BEGIN FILE FORMAT DESCRIPTION\n"
00298 "int32 n_ciphone; /**< Number of base (CI) phones */\n"
00299 "int32 n_phone; /**< Number of base (CI) phones + (CD) triphones */\n"
00300 "int32 n_emit_state; /**< Number of emitting states per phone (0 if heterogeneous) */\n"
00301 "int32 n_ci_sen; /**< Number of CI senones; these are the first */\n"
00302 "int32 n_sen; /**< Number of senones (CI+CD) */\n"
00303 "int32 n_tmat; /**< Number of transition matrices */\n"
00304 "int32 n_sseq; /**< Number of unique senone sequences */\n"
00305 "int32 n_ctx; /**< Number of phones of context */\n"
00306 "int32 n_cd_tree; /**< Number of nodes in CD tree structure */\n"
00307 "int32 sil; /**< CI phone ID for silence */\n"
00308 "char ciphones[][]; /**< CI phone strings (null-terminated) */\n"
00309 "char padding[]; /**< Padding to a 4-bytes boundary */\n"
00310 "struct { int16 ctx; int16 n_down; int32 pid/down } cd_tree[];\n"
00311 "struct { int32 ssid; int32 tmat; int8 attr[4] } phones[];\n"
00312 "int16 sseq[]; /**< Unique senone sequences */\n"
00313 "int8 sseq_len[]; /**< Number of states in each sseq (none if homogeneous) */\n"
00314 "END FILE FORMAT DESCRIPTION\n";
00315
00316 bin_mdef_t *
00317 bin_mdef_read(cmd_ln_t *config, const char *filename)
00318 {
00319 bin_mdef_t *m;
00320 FILE *fh;
00321 size_t tree_start;
00322 int32 val, i, swap, pos, end;
00323 int32 *sseq_size;
00324 int do_mmap;
00325
00326
00327 if ((m = bin_mdef_read_text(config, filename)) != NULL)
00328 return m;
00329
00330 E_INFO("Reading binary model definition: %s\n", filename);
00331 if ((fh = fopen(filename, "rb")) == NULL)
00332 return NULL;
00333
00334 if (fread(&val, 4, 1, fh) != 1) {
00335 fclose(fh);
00336 E_ERROR_SYSTEM("Failed to read byte-order marker from %s\n",
00337 filename);
00338 return NULL;
00339 }
00340 swap = 0;
00341 if (val == BIN_MDEF_OTHER_ENDIAN) {
00342 swap = 1;
00343 E_INFO("Must byte-swap %s\n", filename);
00344 }
00345 if (fread(&val, 4, 1, fh) != 1) {
00346 fclose(fh);
00347 E_ERROR_SYSTEM("Failed to read version from %s\n", filename);
00348 return NULL;
00349 }
00350 if (swap)
00351 SWAP_INT32(&val);
00352 if (val > BIN_MDEF_FORMAT_VERSION) {
00353 E_ERROR("File format version %d for %s is newer than library\n",
00354 val, filename);
00355 fclose(fh);
00356 return NULL;
00357 }
00358 if (fread(&val, 4, 1, fh) != 1) {
00359 fclose(fh);
00360 E_ERROR_SYSTEM("Failed to read header length from %s\n", filename);
00361 return NULL;
00362 }
00363 if (swap)
00364 SWAP_INT32(&val);
00365
00366 fseek(fh, val, SEEK_CUR);
00367
00368
00369 m = ckd_calloc(1, sizeof(*m));
00370 m->refcnt = 1;
00371
00372
00373 #define FREAD_SWAP32_CHK(dest) \
00374 if (fread((dest), 4, 1, fh) != 1) { \
00375 fclose(fh); \
00376 ckd_free(m); \
00377 E_ERROR_SYSTEM("Failed to read %s from %s\n", #dest, filename); \
00378 return NULL; \
00379 } \
00380 if (swap) SWAP_INT32(dest);
00381
00382 FREAD_SWAP32_CHK(&m->n_ciphone);
00383 FREAD_SWAP32_CHK(&m->n_phone);
00384 FREAD_SWAP32_CHK(&m->n_emit_state);
00385 FREAD_SWAP32_CHK(&m->n_ci_sen);
00386 FREAD_SWAP32_CHK(&m->n_sen);
00387 FREAD_SWAP32_CHK(&m->n_tmat);
00388 FREAD_SWAP32_CHK(&m->n_sseq);
00389 FREAD_SWAP32_CHK(&m->n_ctx);
00390 FREAD_SWAP32_CHK(&m->n_cd_tree);
00391 FREAD_SWAP32_CHK(&m->sil);
00392
00393
00394 m->ciname = ckd_calloc(m->n_ciphone, sizeof(*m->ciname));
00395
00396
00397 do_mmap = config ? cmd_ln_boolean_r(config, "-mmap") : TRUE;
00398 if (swap) {
00399 E_WARN("-mmap specified, but mdef is other-endian. Will not memory-map.\n");
00400 do_mmap = FALSE;
00401 }
00402
00403 if (do_mmap) {
00404 m->filemap = mmio_file_read(filename);
00405 if (m->filemap == NULL)
00406 do_mmap = FALSE;
00407 }
00408 pos = ftell(fh);
00409 if (do_mmap) {
00410
00411 m->ciname[0] = (char *)mmio_file_ptr(m->filemap) + pos;
00412
00413 m->alloc_mode = BIN_MDEF_ON_DISK;
00414 }
00415 else {
00416
00417 m->alloc_mode = BIN_MDEF_IN_MEMORY;
00418 fseek(fh, 0, SEEK_END);
00419 end = ftell(fh);
00420 fseek(fh, pos, SEEK_SET);
00421 m->ciname[0] = ckd_malloc(end - pos);
00422 if (fread(m->ciname[0], 1, end - pos, fh) != end - pos)
00423 E_FATAL_SYSTEM("Failed to read %d bytes of data from %s\n",
00424 end - pos, filename);
00425 }
00426
00427 for (i = 1; i < m->n_ciphone; ++i)
00428 m->ciname[i] = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1;
00429
00430
00431 tree_start =
00432 m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1 - m->ciname[0];
00433 tree_start = (tree_start + 3) & ~3;
00434 m->cd_tree = (cd_tree_t *) (m->ciname[0] + tree_start);
00435 if (swap) {
00436 for (i = 0; i < m->n_cd_tree; ++i) {
00437 SWAP_INT16(&m->cd_tree[i].ctx);
00438 SWAP_INT16(&m->cd_tree[i].n_down);
00439 SWAP_INT32(&m->cd_tree[i].c.down);
00440 }
00441 }
00442 m->phone = (mdef_entry_t *) (m->cd_tree + m->n_cd_tree);
00443 if (swap) {
00444 for (i = 0; i < m->n_phone; ++i) {
00445 SWAP_INT32(&m->phone[i].ssid);
00446 SWAP_INT32(&m->phone[i].tmat);
00447 }
00448 }
00449 sseq_size = (int32 *) (m->phone + m->n_phone);
00450 if (swap)
00451 SWAP_INT32(sseq_size);
00452 m->sseq = ckd_calloc(m->n_sseq, sizeof(*m->sseq));
00453 m->sseq[0] = (uint16 *) (sseq_size + 1);
00454 if (swap) {
00455 for (i = 0; i < *sseq_size; ++i)
00456 SWAP_INT16(m->sseq[0] + i);
00457 }
00458 if (m->n_emit_state) {
00459 for (i = 1; i < m->n_sseq; ++i)
00460 m->sseq[i] = m->sseq[0] + i * m->n_emit_state;
00461 }
00462 else {
00463 m->sseq_len = (uint8 *) (m->sseq[0] + *sseq_size);
00464 for (i = 1; i < m->n_sseq; ++i)
00465 m->sseq[i] = m->sseq[i - 1] + m->sseq_len[i - 1];
00466 }
00467
00468
00469
00470
00471
00472 m->cd2cisen = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->cd2cisen));
00473 m->sen2cimap = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->sen2cimap));
00474
00475
00476 for (i = 0; i < m->n_ci_sen; ++i)
00477 m->cd2cisen[i] = i;
00478 for (; i < m->n_sen; ++i)
00479 m->cd2cisen[i] = -1;
00480 for (i = 0; i < m->n_sen; ++i)
00481 m->sen2cimap[i] = -1;
00482 for (i = 0; i < m->n_phone; ++i) {
00483 int32 j, ssid = m->phone[i].ssid;
00484
00485 for (j = 0; j < bin_mdef_n_emit_state_phone(m, i); ++j) {
00486 int s = bin_mdef_sseq2sen(m, ssid, j);
00487 int ci = bin_mdef_pid2ci(m, i);
00488
00489 if (m->sen2cimap[s] == -1)
00490 m->sen2cimap[s] = ci;
00491 if (m->sen2cimap[s] != ci)
00492 E_WARN
00493 ("Senone %d is shared between multiple base phones\n",
00494 s);
00495
00496 if (j > bin_mdef_n_emit_state_phone(m, ci))
00497 E_WARN("CD phone %d has fewer states than CI phone %d\n",
00498 i, ci);
00499 else
00500 m->cd2cisen[s] =
00501 bin_mdef_sseq2sen(m, m->phone[ci].ssid, j);
00502 }
00503 }
00504
00505
00506 m->sil = bin_mdef_ciphone_id(m, S3_SILENCE_CIPHONE);
00507
00508 E_INFO
00509 ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n",
00510 m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state,
00511 m->n_ci_sen, m->n_sen, m->n_sseq);
00512 fclose(fh);
00513 return m;
00514 }
00515
00516 int
00517 bin_mdef_write(bin_mdef_t * m, const char *filename)
00518 {
00519 FILE *fh;
00520 int32 val, i;
00521
00522 if ((fh = fopen(filename, "wb")) == NULL)
00523 return -1;
00524
00525
00526 val = BIN_MDEF_NATIVE_ENDIAN;
00527 fwrite(&val, 1, 4, fh);
00528
00529 val = BIN_MDEF_FORMAT_VERSION;
00530 fwrite(&val, 1, sizeof(val), fh);
00531
00532
00533 val = ((sizeof(format_desc) + 3) & ~3);
00534 fwrite(&val, 1, sizeof(val), fh);
00535 fwrite(format_desc, 1, sizeof(format_desc), fh);
00536
00537 i = 0;
00538 fwrite(&i, 1, val - sizeof(format_desc), fh);
00539
00540
00541 fwrite(&m->n_ciphone, 4, 1, fh);
00542 fwrite(&m->n_phone, 4, 1, fh);
00543 fwrite(&m->n_emit_state, 4, 1, fh);
00544 fwrite(&m->n_ci_sen, 4, 1, fh);
00545 fwrite(&m->n_sen, 4, 1, fh);
00546 fwrite(&m->n_tmat, 4, 1, fh);
00547 fwrite(&m->n_sseq, 4, 1, fh);
00548 fwrite(&m->n_ctx, 4, 1, fh);
00549 fwrite(&m->n_cd_tree, 4, 1, fh);
00550
00551
00552
00553
00554 val = m->sil;
00555 fwrite(&val, 4, 1, fh);
00556
00557
00558 for (i = 0; i < m->n_ciphone; ++i)
00559 fwrite(m->ciname[i], 1, strlen(m->ciname[i]) + 1, fh);
00560
00561 val = (ftell(fh) + 3) & ~3;
00562 i = 0;
00563 fwrite(&i, 1, val - ftell(fh), fh);
00564
00565
00566 fwrite(m->cd_tree, sizeof(*m->cd_tree), m->n_cd_tree, fh);
00567
00568 fwrite(m->phone, sizeof(*m->phone), m->n_phone, fh);
00569 if (m->n_emit_state) {
00570
00571 val = m->n_sseq * m->n_emit_state;
00572 fwrite(&val, 4, 1, fh);
00573
00574
00575 fwrite(m->sseq[0], sizeof(**m->sseq),
00576 m->n_sseq * m->n_emit_state, fh);
00577 }
00578 else {
00579 int32 n;
00580
00581
00582 n = 0;
00583 for (i = 0; i < m->n_sseq; ++i)
00584 n += m->sseq_len[i];
00585
00586
00587 fwrite(&n, 4, 1, fh);
00588
00589
00590 fwrite(m->sseq[0], sizeof(**m->sseq), n, fh);
00591
00592
00593 fwrite(m->sseq_len, 1, m->n_sseq, fh);
00594 }
00595 fclose(fh);
00596
00597 return 0;
00598 }
00599
00600 int
00601 bin_mdef_write_text(bin_mdef_t * m, const char *filename)
00602 {
00603 FILE *fh;
00604 int p, i, n_total_state;
00605
00606 if (strcmp(filename, "-") == 0)
00607 fh = stdout;
00608 else {
00609 if ((fh = fopen(filename, "w")) == NULL)
00610 return -1;
00611 }
00612
00613 fprintf(fh, "0.3\n");
00614 fprintf(fh, "%d n_base\n", m->n_ciphone);
00615 fprintf(fh, "%d n_tri\n", m->n_phone - m->n_ciphone);
00616 if (m->n_emit_state)
00617 n_total_state = m->n_phone * (m->n_emit_state + 1);
00618 else {
00619 n_total_state = 0;
00620 for (i = 0; i < m->n_phone; ++i)
00621 n_total_state += m->sseq_len[m->phone[i].ssid] + 1;
00622 }
00623 fprintf(fh, "%d n_state_map\n", n_total_state);
00624 fprintf(fh, "%d n_tied_state\n", m->n_sen);
00625 fprintf(fh, "%d n_tied_ci_state\n", m->n_ci_sen);
00626 fprintf(fh, "%d n_tied_tmat\n", m->n_tmat);
00627 fprintf(fh, "#\n# Columns definitions\n");
00628 fprintf(fh, "#%4s %3s %3s %1s %6s %4s %s\n",
00629 "base", "lft", "rt", "p", "attrib", "tmat",
00630 " ... state id's ...");
00631
00632 for (p = 0; p < m->n_ciphone; p++) {
00633 int n_state;
00634
00635 fprintf(fh, "%5s %3s %3s %1s", m->ciname[p], "-", "-", "-");
00636
00637 if (bin_mdef_is_fillerphone(m, p))
00638 fprintf(fh, " %6s", "filler");
00639 else
00640 fprintf(fh, " %6s", "n/a");
00641 fprintf(fh, " %4d", m->phone[p].tmat);
00642
00643 if (m->n_emit_state)
00644 n_state = m->n_emit_state;
00645 else
00646 n_state = m->sseq_len[m->phone[p].ssid];
00647 for (i = 0; i < n_state; i++) {
00648 fprintf(fh, " %6u", m->sseq[m->phone[p].ssid][i]);
00649 }
00650 fprintf(fh, " N\n");
00651 }
00652
00653
00654 for (; p < m->n_phone; p++) {
00655 int n_state;
00656
00657 fprintf(fh, "%5s %3s %3s %c",
00658 m->ciname[m->phone[p].info.cd.ctx[0]],
00659 m->ciname[m->phone[p].info.cd.ctx[1]],
00660 m->ciname[m->phone[p].info.cd.ctx[2]],
00661 (WPOS_NAME)[m->phone[p].info.cd.wpos]);
00662
00663 if (bin_mdef_is_fillerphone(m, p))
00664 fprintf(fh, " %6s", "filler");
00665 else
00666 fprintf(fh, " %6s", "n/a");
00667 fprintf(fh, " %4d", m->phone[p].tmat);
00668
00669
00670 if (m->n_emit_state)
00671 n_state = m->n_emit_state;
00672 else
00673 n_state = m->sseq_len[m->phone[p].ssid];
00674 for (i = 0; i < n_state; i++) {
00675 fprintf(fh, " %6u", m->sseq[m->phone[p].ssid][i]);
00676 }
00677 fprintf(fh, " N\n");
00678 }
00679
00680 if (strcmp(filename, "-") != 0)
00681 fclose(fh);
00682 return 0;
00683 }
00684
00685 int
00686 bin_mdef_ciphone_id(bin_mdef_t * m, const char *ciphone)
00687 {
00688 int low, mid, high;
00689
00690
00691 low = 0;
00692 high = m->n_ciphone;
00693 while (low < high) {
00694 int c;
00695
00696 mid = (low + high) / 2;
00697 c = strcmp(ciphone, m->ciname[mid]);
00698 if (c == 0)
00699 return mid;
00700 else if (c > 0)
00701 low = mid + 1;
00702 else if (c < 0)
00703 high = mid;
00704 }
00705 return -1;
00706 }
00707
00708 int
00709 bin_mdef_ciphone_id_nocase(bin_mdef_t * m, const char *ciphone)
00710 {
00711 int low, mid, high;
00712
00713
00714 low = 0;
00715 high = m->n_ciphone;
00716 while (low < high) {
00717 int c;
00718
00719 mid = (low + high) / 2;
00720 c = strcmp_nocase(ciphone, m->ciname[mid]);
00721 if (c == 0)
00722 return mid;
00723 else if (c > 0)
00724 low = mid + 1;
00725 else if (c < 0)
00726 high = mid;
00727 }
00728 return -1;
00729 }
00730
00731 const char *
00732 bin_mdef_ciphone_str(bin_mdef_t * m, int32 ci)
00733 {
00734 assert(m != NULL);
00735 assert(ci < m->n_ciphone);
00736 return m->ciname[ci];
00737 }
00738
00739 int
00740 bin_mdef_phone_id(bin_mdef_t * m, int32 ci, int32 lc, int32 rc, int32 wpos)
00741 {
00742 cd_tree_t *cd_tree;
00743 int level, max;
00744 int16 ctx[4];
00745
00746 assert(m);
00747
00748
00749
00750 if (lc < 0 || rc < 0)
00751 return ci;
00752
00753 assert((ci >= 0) && (ci < m->n_ciphone));
00754 assert((lc >= 0) && (lc < m->n_ciphone));
00755 assert((rc >= 0) && (rc < m->n_ciphone));
00756 assert((wpos >= 0) && (wpos < N_WORD_POSN));
00757
00758
00759 ctx[0] = wpos;
00760 ctx[1] = ci;
00761 ctx[2] = (m->sil >= 0
00762 && m->phone[lc].info.ci.filler) ? m->sil : lc;
00763 ctx[3] = (m->sil >= 0
00764 && m->phone[rc].info.ci.filler) ? m->sil : rc;
00765
00766
00767 cd_tree = m->cd_tree;
00768 level = 0;
00769 max = N_WORD_POSN;
00770 while (level < 4) {
00771 int i;
00772
00773 #if 0
00774 E_INFO("Looking for context %d=%s in %d at %d\n",
00775 ctx[level], m->ciname[ctx[level]],
00776 max, cd_tree - m->cd_tree);
00777 #endif
00778 for (i = 0; i < max; ++i) {
00779 #if 0
00780 E_INFO("Look at context %d=%s at %d\n",
00781 cd_tree[i].ctx,
00782 m->ciname[cd_tree[i].ctx], cd_tree + i - m->cd_tree);
00783 #endif
00784 if (cd_tree[i].ctx == ctx[level])
00785 break;
00786 }
00787 if (i == max)
00788 return -1;
00789 #if 0
00790 E_INFO("Found context %d=%s at %d, n_down=%d, down=%d\n",
00791 ctx[level], m->ciname[ctx[level]],
00792 cd_tree + i - m->cd_tree,
00793 cd_tree[i].n_down, cd_tree[i].c.down);
00794 #endif
00795
00796 if (cd_tree[i].n_down == 0)
00797 return cd_tree[i].c.pid;
00798
00799
00800 max = cd_tree[i].n_down;
00801 cd_tree = m->cd_tree + cd_tree[i].c.down;
00802 ++level;
00803 }
00804
00805 return -1;
00806 }
00807
00808 int
00809 bin_mdef_phone_id_nearest(bin_mdef_t * m, int32 b, int32 l, int32 r, int32 pos)
00810 {
00811 int p, tmppos;
00812
00813
00814
00815
00816
00817 if (l < 0 || r < 0)
00818 return b;
00819
00820 p = bin_mdef_phone_id(m, b, l, r, pos);
00821 if (p >= 0)
00822 return p;
00823
00824
00825 for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) {
00826 if (tmppos != pos) {
00827 p = bin_mdef_phone_id(m, b, l, r, tmppos);
00828 if (p >= 0)
00829 return p;
00830 }
00831 }
00832
00833
00834
00835 if (m->sil >= 0) {
00836 int newl = l, newr = r;
00837 if (m->phone[(int)l].info.ci.filler
00838 || pos == WORD_POSN_BEGIN || pos == WORD_POSN_SINGLE)
00839 newl = m->sil;
00840 if (m->phone[(int)r].info.ci.filler
00841 || pos == WORD_POSN_END || pos == WORD_POSN_SINGLE)
00842 newr = m->sil;
00843 if ((newl != l) || (newr != r)) {
00844 p = bin_mdef_phone_id(m, b, newl, newr, pos);
00845 if (p >= 0)
00846 return p;
00847
00848 for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) {
00849 if (tmppos != pos) {
00850 p = bin_mdef_phone_id(m, b, newl, newr, tmppos);
00851 if (p >= 0)
00852 return p;
00853 }
00854 }
00855 }
00856 }
00857
00858
00859 return b;
00860 }
00861
00862 int
00863 bin_mdef_phone_str(bin_mdef_t * m, int pid, char *buf)
00864 {
00865 char *wpos_name;
00866
00867 assert(m);
00868 assert((pid >= 0) && (pid < m->n_phone));
00869 wpos_name = WPOS_NAME;
00870
00871 buf[0] = '\0';
00872 if (pid < m->n_ciphone)
00873 sprintf(buf, "%s", bin_mdef_ciphone_str(m, pid));
00874 else {
00875 sprintf(buf, "%s %s %s %c",
00876 bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[0]),
00877 bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[1]),
00878 bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[2]),
00879 wpos_name[m->phone[pid].info.cd.wpos]);
00880 }
00881 return 0;
00882 }