PocketSphinx  0.6
ptm_mgau.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 1999-2010 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 /* System headers */
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <assert.h>
43 #include <limits.h>
44 #include <math.h>
45 #if defined(__ADSPBLACKFIN__)
46 #elif !defined(_WIN32_WCE)
47 #include <sys/types.h>
48 #endif
49 
50 #ifndef M_PI
51 #define M_PI 3.14159265358979323846
52 #endif
53 
54 /* SphinxBase headers */
55 #include <sphinx_config.h>
56 #include <sphinxbase/cmd_ln.h>
57 #include <sphinxbase/fixpoint.h>
58 #include <sphinxbase/ckd_alloc.h>
59 #include <sphinxbase/bio.h>
60 #include <sphinxbase/err.h>
61 #include <sphinxbase/prim_type.h>
62 
63 /* Local headers */
64 #include "tied_mgau_common.h"
65 #include "ptm_mgau.h"
66 #include "posixwin32.h"
67 
68 static ps_mgaufuncs_t ptm_mgau_funcs = {
69  "ptm",
70  &ptm_mgau_frame_eval, /* frame_eval */
71  &ptm_mgau_mllr_transform, /* transform */
72  &ptm_mgau_free /* free */
73 };
74 
75 #define COMPUTE_GMM_MAP(_idx) \
76  diff[_idx] = obs[_idx] - mean[_idx]; \
77  sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]); \
78  compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]);
79 #define COMPUTE_GMM_REDUCE(_idx) \
80  d = GMMSUB(d, compl[_idx]);
81 
82 static void
83 insertion_sort_topn(ptm_topn_t *topn, int i, int32 d)
84 {
85  ptm_topn_t vtmp;
86  int j;
87 
88  topn[i].score = d;
89  if (i == 0)
90  return;
91  vtmp = topn[i];
92  for (j = i - 1; j >= 0 && d > topn[j].score; j--) {
93  topn[j + 1] = topn[j];
94  }
95  topn[j + 1] = vtmp;
96 }
97 
98 static int
99 eval_topn(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
100 {
101  ptm_topn_t *topn;
102  int i, ceplen;
103 
104  topn = s->f->topn[cb][feat];
105  ceplen = s->g->featlen[feat];
106 
107  for (i = 0; i < s->max_topn; i++) {
108  mfcc_t *mean, diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
109  mfcc_t *var, d;
110  mfcc_t *obs;
111  int32 cw, j;
112 
113  cw = topn[i].cw;
114  mean = s->g->mean[cb][feat][0] + cw * ceplen;
115  var = s->g->var[cb][feat][0] + cw * ceplen;
116  d = s->g->det[cb][feat][cw];
117  obs = z;
118  for (j = 0; j < ceplen % 4; ++j) {
119  diff[0] = *obs++ - *mean++;
120  sqdiff[0] = MFCCMUL(diff[0], diff[0]);
121  compl[0] = MFCCMUL(sqdiff[0], *var);
122  d = GMMSUB(d, compl[0]);
123  ++var;
124  }
125  /* We could vectorize this but it's unlikely to make much
126  * difference as the outer loop here isn't very big. */
127  for (;j < ceplen; j += 4) {
128  COMPUTE_GMM_MAP(0);
129  COMPUTE_GMM_MAP(1);
130  COMPUTE_GMM_MAP(2);
131  COMPUTE_GMM_MAP(3);
132  COMPUTE_GMM_REDUCE(0);
133  COMPUTE_GMM_REDUCE(1);
134  COMPUTE_GMM_REDUCE(2);
135  COMPUTE_GMM_REDUCE(3);
136  var += 4;
137  obs += 4;
138  mean += 4;
139  }
140  insertion_sort_topn(topn, i, (int32)d);
141  }
142 
143  return topn[0].score;
144 }
145 
146 /* This looks bad, but it actually isn't. Less than 1% of eval_cb's
147  * time is spent doing this. */
148 static void
149 insertion_sort_cb(ptm_topn_t **cur, ptm_topn_t *worst, ptm_topn_t *best,
150  int cw, int32 intd)
151 {
152  for (*cur = worst - 1; *cur >= best && intd >= (*cur)->score; --*cur)
153  memcpy(*cur + 1, *cur, sizeof(**cur));
154  ++*cur;
155  (*cur)->cw = cw;
156  (*cur)->score = intd;
157 }
158 
159 static int
160 eval_cb(ptm_mgau_t *s, int cb, int feat, mfcc_t *z)
161 {
162  ptm_topn_t *worst, *best, *topn;
163  mfcc_t *mean;
164  mfcc_t *var, *det, *detP, *detE;
165  int32 i, ceplen;
166 
167  best = topn = s->f->topn[cb][feat];
168  worst = topn + (s->max_topn - 1);
169  mean = s->g->mean[cb][feat][0];
170  var = s->g->var[cb][feat][0];
171  det = s->g->det[cb][feat];
172  detE = det + s->g->n_density;
173  ceplen = s->g->featlen[feat];
174 
175  for (detP = det; detP < detE; ++detP) {
176  mfcc_t diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */
177  mfcc_t d, thresh;
178  mfcc_t *obs;
179  ptm_topn_t *cur;
180  int32 cw, j;
181 
182  d = *detP;
183  thresh = (mfcc_t) worst->score; /* Avoid int-to-float conversions */
184  obs = z;
185  cw = detP - det;
186 
187  /* Unroll the loop starting with the first dimension(s). In
188  * theory this might be a bit faster if this Gaussian gets
189  * "knocked out" by C0. In practice not. */
190  for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) {
191  diff[0] = *obs++ - *mean++;
192  sqdiff[0] = MFCCMUL(diff[0], diff[0]);
193  compl[0] = MFCCMUL(sqdiff[0], *var++);
194  d = GMMSUB(d, compl[0]);
195  }
196  /* Now do 4 dimensions at a time. You'd think that GCC would
197  * vectorize this? Apparently not. And it's right, because
198  * that won't make this any faster, at least on x86-64. */
199  for (; j < ceplen && d >= thresh; j += 4) {
200  COMPUTE_GMM_MAP(0);
201  COMPUTE_GMM_MAP(1);
202  COMPUTE_GMM_MAP(2);
203  COMPUTE_GMM_MAP(3);
204  COMPUTE_GMM_REDUCE(0);
205  COMPUTE_GMM_REDUCE(1);
206  COMPUTE_GMM_REDUCE(2);
207  COMPUTE_GMM_REDUCE(3);
208  var += 4;
209  obs += 4;
210  mean += 4;
211  }
212  if (j < ceplen) {
213  /* terminated early, so not in topn */
214  mean += (ceplen - j);
215  var += (ceplen - j);
216  continue;
217  }
218  if (d < thresh)
219  continue;
220  for (i = 0; i < s->max_topn; i++) {
221  /* already there, so don't need to insert */
222  if (topn[i].cw == cw)
223  break;
224  }
225  if (i < s->max_topn)
226  continue; /* already there. Don't insert */
227  insertion_sort_cb(&cur, worst, best, cw, (int32)d);
228  }
229 
230  return best->score;
231 }
232 
236 static int
237 ptm_mgau_codebook_eval(ptm_mgau_t *s, mfcc_t **z, int frame)
238 {
239  int i, j;
240 
241  /* First evaluate top-N from previous frame. */
242  for (i = 0; i < s->g->n_mgau; ++i)
243  for (j = 0; j < s->g->n_feat; ++j)
244  eval_topn(s, i, j, z[j]);
245 
246  /* If frame downsampling is in effect, possibly do nothing else. */
247  if (frame % s->ds_ratio)
248  return 0;
249 
250  /* Evaluate remaining codebooks. */
251  for (i = 0; i < s->g->n_mgau; ++i) {
252  if (bitvec_is_clear(s->f->mgau_active, i))
253  continue;
254  for (j = 0; j < s->g->n_feat; ++j) {
255  eval_cb(s, i, j, z[j]);
256  }
257  }
258 
259  /* Normalize densities to produce "posterior probabilities",
260  * i.e. things with a reasonable dynamic range, then scale and
261  * clamp them to the acceptable range. This is actually done
262  * solely to ensure that we can use fast_logmath_add(). Note that
263  * unless we share the same normalizer across all codebooks for
264  * each feature stream we get defective scores (that's why these
265  * loops are inside out - doing it per-feature should give us
266  * greater precision). */
267  for (j = 0; j < s->g->n_feat; ++j) {
268  int32 norm = 0x7fffffff;
269  for (i = 0; i < s->g->n_mgau; ++i) {
270  if (bitvec_is_clear(s->f->mgau_active, i))
271  continue;
272  if (norm > s->f->topn[i][j][0].score >> SENSCR_SHIFT)
273  norm = s->f->topn[i][j][0].score >> SENSCR_SHIFT;
274  }
275  assert(norm != 0x7fffffff);
276  for (i = 0; i < s->g->n_mgau; ++i) {
277  int32 k;
278  if (bitvec_is_clear(s->f->mgau_active, i))
279  continue;
280  for (k = 0; k < s->max_topn; ++k) {
281  s->f->topn[i][j][k].score >>= SENSCR_SHIFT;
282  s->f->topn[i][j][k].score -= norm;
283  s->f->topn[i][j][k].score = -s->f->topn[i][j][k].score;
284  if (s->f->topn[i][j][k].score > MAX_NEG_ASCR)
285  s->f->topn[i][j][k].score = MAX_NEG_ASCR;
286  }
287  }
288  }
289 
290  return 0;
291 }
292 
293 static int
294 ptm_mgau_calc_cb_active(ptm_mgau_t *s, uint8 *senone_active,
295  int32 n_senone_active, int compallsen)
296 {
297  int i, lastsen;
298 
299  if (compallsen) {
300  bitvec_set_all(s->f->mgau_active, s->g->n_mgau);
301  return 0;
302  }
303  bitvec_clear_all(s->f->mgau_active, s->g->n_mgau);
304  for (lastsen = i = 0; i < n_senone_active; ++i) {
305  int sen = senone_active[i] + lastsen;
306  int cb = s->sen2cb[sen];
307  bitvec_set(s->f->mgau_active, cb);
308  lastsen = sen;
309  }
310  E_DEBUG(1, ("Active codebooks:"));
311  for (i = 0; i < s->g->n_mgau; ++i) {
312  if (bitvec_is_clear(s->f->mgau_active, i))
313  continue;
314  E_DEBUGCONT(1, (" %d", i));
315  }
316  E_DEBUGCONT(1, ("\n"));
317  return 0;
318 }
319 
323 static int
324 ptm_mgau_senone_eval(ptm_mgau_t *s, int16 *senone_scores,
325  uint8 *senone_active, int32 n_senone_active,
326  int compall)
327 {
328  int i, lastsen, bestscore;
329 
330  memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores));
331  /* FIXME: This is the non-cache-efficient way to do this. We want
332  * to evaluate one codeword at a time but this requires us to have
333  * a reverse codebook to senone mapping, which we don't have
334  * (yet), since different codebooks have different top-N
335  * codewords. */
336  if (compall)
337  n_senone_active = s->n_sen;
338  bestscore = 0x7fffffff;
339  for (lastsen = i = 0; i < n_senone_active; ++i) {
340  int sen, f, cb;
341  int ascore;
342 
343  if (compall)
344  sen = i;
345  else
346  sen = senone_active[i] + lastsen;
347  lastsen = sen;
348  cb = s->sen2cb[sen];
349 
350  if (bitvec_is_clear(s->f->mgau_active, cb)) {
351  int j;
352  /* Because senone_active is deltas we can't really "knock
353  * out" senones from pruned codebooks, and in any case,
354  * it wouldn't make any difference to the search code,
355  * which doesn't expect senone_active to change. */
356  for (f = 0; f < s->g->n_feat; ++f) {
357  for (j = 0; j < s->max_topn; ++j) {
358  s->f->topn[cb][f][j].score = MAX_NEG_ASCR;
359  }
360  }
361  }
362  /* For each feature, log-sum codeword scores + mixw to get
363  * feature density, then sum (multiply) to get ascore */
364  ascore = 0;
365  for (f = 0; f < s->g->n_feat; ++f) {
366  ptm_topn_t *topn;
367  int j, fden = 0;
368  topn = s->f->topn[cb][f];
369  for (j = 0; j < s->max_topn; ++j) {
370  int mixw;
371  /* Find mixture weight for this codeword. */
372  if (s->mixw_cb) {
373  int dcw = s->mixw[f][topn[j].cw][sen/2];
374  dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f;
375  mixw = s->mixw_cb[dcw];
376  }
377  else {
378  mixw = s->mixw[f][topn[j].cw][sen];
379  }
380  if (j == 0)
381  fden = mixw + topn[j].score;
382  else
383  fden = fast_logmath_add(s->lmath_8b, fden,
384  mixw + topn[j].score);
385  E_DEBUG(3, ("fden[%d][%d] l+= %d + %d = %d\n",
386  sen, f, mixw, topn[j].score, fden));
387  }
388  ascore += fden;
389  }
390  if (ascore < bestscore) bestscore = ascore;
391  senone_scores[sen] = ascore;
392  }
393  /* Normalize the scores again (finishing the job we started above
394  * in ptm_mgau_codebook_eval...) */
395  for (i = 0; i < s->n_sen; ++i) {
396  senone_scores[i] -= bestscore;
397  }
398 
399  return 0;
400 }
401 
405 int32
406 ptm_mgau_frame_eval(ps_mgau_t *ps,
407  int16 *senone_scores,
408  uint8 *senone_active,
409  int32 n_senone_active,
410  mfcc_t ** featbuf, int32 frame,
411  int32 compallsen)
412 {
413  ptm_mgau_t *s = (ptm_mgau_t *)ps;
414  int fast_eval_idx;
415 
416  /* Find the appropriate frame in the rotating history buffer
417  * corresponding to the requested input frame. No bounds checking
418  * is done here, which just means you'll get semi-random crap if
419  * you request a frame in the future or one that's too far in the
420  * past. Since the history buffer is just used for fast match
421  * that might not be fatal. */
422  fast_eval_idx = frame % s->n_fast_hist;
423  s->f = s->hist + fast_eval_idx;
424  /* Compute the top-N codewords for every codebook, unless this
425  * is a past frame, in which case we already have them (we
426  * hope!) */
427  if (frame >= ps_mgau_base(ps)->frame_idx) {
428  ptm_fast_eval_t *lastf;
429  /* Get the previous frame's top-N information (on the
430  * first frame of the input this is just all WORST_DIST,
431  * no harm in that) */
432  if (fast_eval_idx == 0)
433  lastf = s->hist + s->n_fast_hist - 1;
434  else
435  lastf = s->hist + fast_eval_idx - 1;
436  /* Copy in initial top-N info */
437  memcpy(s->f->topn[0][0], lastf->topn[0][0],
438  s->g->n_mgau * s->g->n_feat * s->max_topn * sizeof(ptm_topn_t));
439  /* Generate initial active codebook list (this might not be
440  * necessary) */
441  ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen);
442  /* Now evaluate top-N, prune, and evaluate remaining codebooks. */
443  ptm_mgau_codebook_eval(s, featbuf, frame);
444  }
445  /* Evaluate intersection of active senones and active codebooks. */
446  ptm_mgau_senone_eval(s, senone_scores, senone_active,
447  n_senone_active, compallsen);
448 
449  return 0;
450 }
451 
452 static int32
453 read_sendump(ptm_mgau_t *s, bin_mdef_t *mdef, char const *file)
454 {
455  FILE *fp;
456  char line[1000];
457  int32 i, n, r, c;
458  int32 do_swap, do_mmap;
459  size_t filesize, offset;
460  int n_clust = 0;
461  int n_feat = s->g->n_feat;
462  int n_density = s->g->n_density;
463  int n_sen = bin_mdef_n_sen(mdef);
464  int n_bits = 8;
465 
466  s->n_sen = n_sen; /* FIXME: Should have been done earlier */
467  do_mmap = cmd_ln_boolean_r(s->config, "-mmap");
468 
469  if ((fp = fopen(file, "rb")) == NULL)
470  return -1;
471 
472  E_INFO("Loading senones from dump file %s\n", file);
473  /* Read title size, title */
474  if (fread(&n, sizeof(int32), 1, fp) != 1) {
475  E_ERROR_SYSTEM("Failed to read title size from %s", file);
476  goto error_out;
477  }
478  /* This is extremely bogus */
479  do_swap = 0;
480  if (n < 1 || n > 999) {
481  SWAP_INT32(&n);
482  if (n < 1 || n > 999) {
483  E_ERROR("Title length %x in dump file %s out of range\n", n, file);
484  goto error_out;
485  }
486  do_swap = 1;
487  }
488  if (fread(line, sizeof(char), n, fp) != n) {
489  E_ERROR_SYSTEM("Cannot read title");
490  goto error_out;
491  }
492  if (line[n - 1] != '\0') {
493  E_ERROR("Bad title in dump file\n");
494  goto error_out;
495  }
496  E_INFO("%s\n", line);
497 
498  /* Read header size, header */
499  if (fread(&n, sizeof(n), 1, fp) != 1) {
500  E_ERROR_SYSTEM("Failed to read header size from %s", file);
501  goto error_out;
502  }
503  if (do_swap) SWAP_INT32(&n);
504  if (fread(line, sizeof(char), n, fp) != n) {
505  E_ERROR_SYSTEM("Cannot read header");
506  goto error_out;
507  }
508  if (line[n - 1] != '\0') {
509  E_ERROR("Bad header in dump file\n");
510  goto error_out;
511  }
512 
513  /* Read other header strings until string length = 0 */
514  for (;;) {
515  if (fread(&n, sizeof(n), 1, fp) != 1) {
516  E_ERROR_SYSTEM("Failed to read header string size from %s", file);
517  goto error_out;
518  }
519  if (do_swap) SWAP_INT32(&n);
520  if (n == 0)
521  break;
522  if (fread(line, sizeof(char), n, fp) != n) {
523  E_ERROR_SYSTEM("Cannot read header");
524  goto error_out;
525  }
526  /* Look for a cluster count, if present */
527  if (!strncmp(line, "feature_count ", strlen("feature_count "))) {
528  n_feat = atoi(line + strlen("feature_count "));
529  }
530  if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) {
531  n_density = atoi(line + strlen("mixture_count "));
532  }
533  if (!strncmp(line, "model_count ", strlen("model_count "))) {
534  n_sen = atoi(line + strlen("model_count "));
535  }
536  if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) {
537  n_clust = atoi(line + strlen("cluster_count "));
538  }
539  if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) {
540  n_bits = atoi(line + strlen("cluster_bits "));
541  }
542  }
543 
544  /* Defaults for #rows, #columns in mixw array. */
545  c = n_sen;
546  r = n_density;
547  if (n_clust == 0) {
548  /* Older mixw files have them here, and they might be padded. */
549  if (fread(&r, sizeof(r), 1, fp) != 1) {
550  E_ERROR_SYSTEM("Cannot read #rows");
551  goto error_out;
552  }
553  if (do_swap) SWAP_INT32(&r);
554  if (fread(&c, sizeof(c), 1, fp) != 1) {
555  E_ERROR_SYSTEM("Cannot read #columns");
556  goto error_out;
557  }
558  if (do_swap) SWAP_INT32(&c);
559  E_INFO("Rows: %d, Columns: %d\n", r, c);
560  }
561 
562  if (n_feat != s->g->n_feat) {
563  E_ERROR("Number of feature streams mismatch: %d != %d\n",
564  n_feat, s->g->n_feat);
565  goto error_out;
566  }
567  if (n_density != s->g->n_density) {
568  E_ERROR("Number of densities mismatch: %d != %d\n",
569  n_density, s->g->n_density);
570  goto error_out;
571  }
572  if (n_sen != s->n_sen) {
573  E_ERROR("Number of senones mismatch: %d != %d\n",
574  n_sen, s->n_sen);
575  goto error_out;
576  }
577 
578  if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) {
579  E_ERROR("Cluster count must be 0, 15, or 16\n");
580  goto error_out;
581  }
582  if (n_clust == 15)
583  ++n_clust;
584 
585  if (!((n_bits == 8) || (n_bits == 4))) {
586  E_ERROR("Cluster count must be 4 or 8\n");
587  goto error_out;
588  }
589 
590  if (do_mmap) {
591  E_INFO("Using memory-mapped I/O for senones\n");
592  }
593  offset = ftell(fp);
594  fseek(fp, 0, SEEK_END);
595  filesize = ftell(fp);
596  fseek(fp, offset, SEEK_SET);
597 
598  /* Allocate memory for pdfs (or memory map them) */
599  if (do_mmap) {
600  s->sendump_mmap = mmio_file_read(file);
601  /* Get cluster codebook if any. */
602  if (n_clust) {
603  s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
604  offset += n_clust;
605  }
606  }
607  else {
608  /* Get cluster codebook if any. */
609  if (n_clust) {
610  s->mixw_cb = ckd_calloc(1, n_clust);
611  if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) {
612  E_ERROR("Failed to read %d bytes from sendump\n", n_clust);
613  goto error_out;
614  }
615  }
616  }
617 
618  /* Set up pointers, or read, or whatever */
619  if (s->sendump_mmap) {
620  s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw));
621  for (n = 0; n < n_feat; n++) {
622  int step = c;
623  if (n_bits == 4)
624  step = (step + 1) / 2;
625  for (i = 0; i < r; i++) {
626  s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset;
627  offset += step;
628  }
629  }
630  }
631  else {
632  s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw));
633  /* Read pdf values and ids */
634  for (n = 0; n < n_feat; n++) {
635  int step = c;
636  if (n_bits == 4)
637  step = (step + 1) / 2;
638  for (i = 0; i < r; i++) {
639  if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp)
640  != (size_t) step) {
641  E_ERROR("Failed to read %d bytes from sendump\n", step);
642  goto error_out;
643  }
644  }
645  }
646  }
647 
648  fclose(fp);
649  return 0;
650 error_out:
651  fclose(fp);
652  return -1;
653 }
654 
655 static int32
656 read_mixw(ptm_mgau_t * s, char const *file_name, double SmoothMin)
657 {
658  char **argname, **argval;
659  char eofchk;
660  FILE *fp;
661  int32 byteswap, chksum_present;
662  uint32 chksum;
663  float32 *pdf;
664  int32 i, f, c, n;
665  int32 n_sen;
666  int32 n_feat;
667  int32 n_comp;
668  int32 n_err;
669 
670  E_INFO("Reading mixture weights file '%s'\n", file_name);
671 
672  if ((fp = fopen(file_name, "rb")) == NULL)
673  E_FATAL("Failed to open mixture file '%s' for reading: %s\n", file_name, strerror(errno));
674 
675  /* Read header, including argument-value info and 32-bit byteorder magic */
676  if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
677  E_FATAL("Failed to read header from '%s'\n", file_name);
678 
679  /* Parse argument-value list */
680  chksum_present = 0;
681  for (i = 0; argname[i]; i++) {
682  if (strcmp(argname[i], "version") == 0) {
683  if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0)
684  E_WARN("Version mismatch(%s): %s, expecting %s\n",
685  file_name, argval[i], MGAU_MIXW_VERSION);
686  }
687  else if (strcmp(argname[i], "chksum0") == 0) {
688  chksum_present = 1; /* Ignore the associated value */
689  }
690  }
691  bio_hdrarg_free(argname, argval);
692  argname = argval = NULL;
693 
694  chksum = 0;
695 
696  /* Read #senones, #features, #codewords, arraysize */
697  if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1)
698  || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) !=
699  1)
700  || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) !=
701  1)
702  || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
703  E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
704  }
705  if (n_feat != s->g->n_feat)
706  E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat);
707  if (n != n_sen * n_feat * n_comp) {
708  E_FATAL
709  ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n",
710  file_name, i, n_sen, n_feat, n_comp);
711  }
712 
713  /* n_sen = number of mixture weights per codeword, which is
714  * fixed at the number of senones since we have only one codebook.
715  */
716  s->n_sen = n_sen;
717 
718  /* Quantized mixture weight arrays. */
719  s->mixw = ckd_calloc_3d(s->g->n_feat, s->g->n_density,
720  n_sen, sizeof(***s->mixw));
721 
722  /* Temporary structure to read in floats before conversion to (int32) logs3 */
723  pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32));
724 
725  /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */
726  n_err = 0;
727  for (i = 0; i < n_sen; i++) {
728  for (f = 0; f < n_feat; f++) {
729  if (bio_fread((void *) pdf, sizeof(float32),
730  n_comp, fp, byteswap, &chksum) != n_comp) {
731  E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name);
732  }
733 
734  /* Normalize and floor */
735  if (vector_sum_norm(pdf, n_comp) <= 0.0)
736  n_err++;
737  vector_floor(pdf, n_comp, SmoothMin);
738  vector_sum_norm(pdf, n_comp);
739 
740  /* Convert to LOG, quantize, and transpose */
741  for (c = 0; c < n_comp; c++) {
742  int32 qscr;
743 
744  qscr = -logmath_log(s->lmath_8b, pdf[c]);
745  if ((qscr > MAX_NEG_MIXW) || (qscr < 0))
746  qscr = MAX_NEG_MIXW;
747  s->mixw[f][c][i] = qscr;
748  }
749  }
750  }
751  if (n_err > 0)
752  E_WARN("Weight normalization failed for %d senones\n", n_err);
753 
754  ckd_free(pdf);
755 
756  if (chksum_present)
757  bio_verify_chksum(fp, byteswap, chksum);
758 
759  if (fread(&eofchk, 1, 1, fp) == 1)
760  E_FATAL("More data than expected in %s\n", file_name);
761 
762  fclose(fp);
763 
764  E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp);
765  return n_sen;
766 }
767 
768 ps_mgau_t *
769 ptm_mgau_init(acmod_t *acmod, bin_mdef_t *mdef)
770 {
771  ptm_mgau_t *s;
772  ps_mgau_t *ps;
773  char const *sendump_path;
774  int i;
775 
776  s = ckd_calloc(1, sizeof(*s));
777  s->config = acmod->config;
778 
779  s->lmath = logmath_retain(acmod->lmath);
780  /* Log-add table. */
781  s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE);
782  if (s->lmath_8b == NULL)
783  goto error_out;
784  /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */
785  if (logmath_get_width(s->lmath_8b) != 1) {
786  E_ERROR("Log base %f is too small to represent add table in 8 bits\n",
787  logmath_get_base(s->lmath_8b));
788  goto error_out;
789  }
790 
791  /* Read means and variances. */
792  if ((s->g = gauden_init(cmd_ln_str_r(s->config, "-mean"),
793  cmd_ln_str_r(s->config, "-var"),
794  cmd_ln_float32_r(s->config, "-varfloor"),
795  s->lmath)) == NULL)
796  goto error_out;
797  /* We only support 256 codebooks or less (like 640k or 2GB, this
798  * should be enough for anyone) */
799  if (s->g->n_mgau > 256) {
800  E_INFO("Number of codebooks exceeds 256: %d\n", s->g->n_mgau);
801  goto error_out;
802  }
803  if (s->g->n_mgau != bin_mdef_n_ciphone(mdef)) {
804  E_INFO("Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d %d\n", s->g->n_mgau, bin_mdef_n_ciphone(mdef));
805  goto error_out;
806  }
807  /* Verify n_feat and veclen, against acmod. */
808  if (s->g->n_feat != feat_dimension1(acmod->fcb)) {
809  E_ERROR("Number of streams does not match: %d != %d\n",
810  s->g->n_feat, feat_dimension(acmod->fcb));
811  goto error_out;
812  }
813  for (i = 0; i < s->g->n_feat; ++i) {
814  if (s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) {
815  E_ERROR("Dimension of stream %d does not match: %d != %d\n",
816  s->g->featlen[i], feat_dimension2(acmod->fcb, i));
817  goto error_out;
818  }
819  }
820  /* Read mixture weights. */
821  if ((sendump_path = cmd_ln_str_r(s->config, "-sendump"))) {
822  if (read_sendump(s, acmod->mdef, sendump_path) < 0) {
823  goto error_out;
824  }
825  }
826  else {
827  if (read_mixw(s, cmd_ln_str_r(s->config, "-mixw"),
828  cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) {
829  goto error_out;
830  }
831  }
832  s->ds_ratio = cmd_ln_int32_r(s->config, "-ds");
833  s->max_topn = cmd_ln_int32_r(s->config, "-topn");
834  E_INFO("Maximum top-N: %d\n", s->max_topn);
835 
836  /* Assume mapping of senones to their base phones, though this
837  * will become more flexible in the future. */
838  s->sen2cb = ckd_calloc(s->n_sen, sizeof(*s->sen2cb));
839  for (i = 0; i < s->n_sen; ++i)
840  s->sen2cb[i] = bin_mdef_sen2cimap(acmod->mdef, i);
841 
842  /* Allocate fast-match history buffers. We need enough for the
843  * phoneme lookahead window, plus the current frame, plus one for
844  * good measure? (FIXME: I don't remember why) */
845  s->n_fast_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2;
846  s->hist = ckd_calloc(s->n_fast_hist, sizeof(*s->hist));
847  /* s->f will be a rotating pointer into s->hist. */
848  s->f = s->hist;
849  for (i = 0; i < s->n_fast_hist; ++i) {
850  int j, k, m;
851  /* Top-N codewords for every codebook and feature. */
852  s->hist[i].topn = ckd_calloc_3d(s->g->n_mgau, s->g->n_feat,
853  s->max_topn, sizeof(ptm_topn_t));
854  /* Initialize them to sane (yet arbitrary) defaults. */
855  for (j = 0; j < s->g->n_mgau; ++j) {
856  for (k = 0; k < s->g->n_feat; ++k) {
857  for (m = 0; m < s->max_topn; ++m) {
858  s->hist[i].topn[j][k][m].cw = m;
859  s->hist[i].topn[j][k][m].score = WORST_DIST;
860  }
861  }
862  }
863  /* Active codebook mapping (just codebook, not features,
864  at least not yet) */
865  s->hist[i].mgau_active = bitvec_alloc(s->g->n_mgau);
866  /* Start with them all on, prune them later. */
867  bitvec_set_all(s->hist[i].mgau_active, s->g->n_mgau);
868  }
869 
870  ps = (ps_mgau_t *)s;
871  ps->vt = &ptm_mgau_funcs;
872  return ps;
873 error_out:
874  ptm_mgau_free(ps_mgau_base(s));
875  return NULL;
876 }
877 
878 int
879 ptm_mgau_mllr_transform(ps_mgau_t *ps,
880  ps_mllr_t *mllr)
881 {
882  ptm_mgau_t *s = (ptm_mgau_t *)ps;
883  return gauden_mllr_transform(s->g, mllr, s->config);
884 }
885 
886 void
887 ptm_mgau_free(ps_mgau_t *ps)
888 {
889  ptm_mgau_t *s = (ptm_mgau_t *)ps;
890 
891  logmath_free(s->lmath);
892  logmath_free(s->lmath_8b);
893  if (s->sendump_mmap) {
894  ckd_free_2d(s->mixw);
895  mmio_file_unmap(s->sendump_mmap);
896  }
897  else {
898  ckd_free_3d(s->mixw);
899  }
900  ckd_free(s->sen2cb);
901  gauden_free(s->g);
902  ckd_free(s);
903 }