SphinxBase 0.6

src/libsphinxbase/feat/lda.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2006 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * lda.c -- Read and apply LDA matrices to features.
00039  *
00040  * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
00041  */
00042 
00043 #include <assert.h>
00044 #include <string.h>
00045 #ifdef HAVE_CONFIG_H
00046 #include <config.h>
00047 #endif
00048 
00049 #ifdef _MSC_VER
00050 #pragma warning (disable: 4018)
00051 #endif
00052 
00053 #include "sphinxbase/feat.h"
00054 #include "sphinxbase/ckd_alloc.h"
00055 #include "sphinxbase/bio.h"
00056 #include "sphinxbase/err.h"
00057 
00058 #define MATRIX_FILE_VERSION "0.1"
00059 
00060 int32
00061 feat_read_lda(feat_t *feat, const char *ldafile, int32 dim)
00062 {
00063     FILE *fh;
00064     int32 byteswap, chksum_present;
00065     uint32 chksum, i, m, n;
00066     char **argname, **argval;
00067 
00068     assert(feat);
00069     if (feat->n_stream != 1) {
00070         E_ERROR("LDA incompatible with multi-stream features (n_stream = %d)\n",
00071                 feat->n_stream);
00072         return -1;
00073     }
00074 
00075     if ((fh = fopen(ldafile, "rb")) == NULL) {
00076         E_ERROR_SYSTEM("Failed to open transform file '%s' for reading: %s\n", ldafile, strerror(errno));
00077         return -1;
00078     }
00079 
00080     if (bio_readhdr(fh, &argname, &argval, &byteswap) < 0) {
00081         E_ERROR("Failed to read header from transform file '%s'\n", ldafile);
00082         fclose(fh);
00083         return -1;
00084     }
00085 
00086     chksum_present = 0;
00087     for (i = 0; argname[i]; i++) {
00088         if (strcmp(argname[i], "version") == 0) {
00089             if (strcmp(argval[i], MATRIX_FILE_VERSION) != 0)
00090                 E_WARN("%s: Version mismatch: %s, expecting %s\n",
00091                        ldafile, argval[i], MATRIX_FILE_VERSION);
00092         }
00093         else if (strcmp(argname[i], "chksum0") == 0) {
00094             chksum_present = 1; /* Ignore the associated value */
00095         }
00096     }
00097 
00098     bio_hdrarg_free(argname, argval);
00099     argname = argval = NULL;
00100 
00101     chksum = 0;
00102 
00103     if (feat->lda)
00104         ckd_free_3d((void ***)feat->lda);
00105 
00106     {
00107         /* Use a temporary variable to avoid strict-aliasing problems. */
00108         void ***outlda;
00109 
00110         if (bio_fread_3d(&outlda, sizeof(float32),
00111                          &feat->n_lda, &m, &n,
00112                          fh, byteswap, &chksum) < 0) {
00113             E_ERROR_SYSTEM("%s: bio_fread_3d(lda) failed\n", ldafile);
00114             fclose(fh);
00115             return -1;
00116         }
00117         feat->lda = (void *)outlda;
00118     }
00119     fclose(fh);
00120     
00121 #ifdef FIXED_POINT
00122     /* FIXME: This is a fragile hack that depends on mfcc_t and
00123      * float32 being the same size (which they are, but...) */
00124     for (i = 0; i < feat->n_lda * m * n; ++i) {
00125         feat->lda[0][0][i] = FLOAT2MFCC(((float *)feat->lda[0][0])[i]);
00126     }
00127 #endif
00128 
00129     /* Note that SphinxTrain stores the eigenvectors as row vectors. */
00130     if (n != feat->stream_len[0])
00131         E_FATAL("LDA matrix dimension %d doesn't match feature stream size %d\n", n, feat->stream_len[0]);
00132     
00133     /* Override dim from file if it is 0 or greater than m. */
00134     if (dim > m || dim <= 0) {
00135         dim = m;
00136     }
00137     feat->out_dim = dim;
00138 
00139     return 0;
00140 }
00141 
00142 void
00143 feat_lda_transform(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr)
00144 {
00145     mfcc_t *tmp;
00146     uint32 i, j, k;
00147 
00148     tmp = ckd_calloc(fcb->stream_len[0], sizeof(mfcc_t));
00149     for (i = 0; i < nfr; ++i) {
00150         /* Do the matrix multiplication inline here since fcb->lda
00151          * is transposed (eigenvectors in rows not columns). */
00152         /* FIXME: In the future we ought to use the BLAS. */
00153         memset(tmp, 0, sizeof(mfcc_t) * fcb->stream_len[0]);
00154         for (j = 0; j < feat_dimension(fcb); ++j) {
00155             for (k = 0; k < fcb->stream_len[0]; ++k) {
00156                 tmp[j] += MFCCMUL(inout_feat[i][0][k], fcb->lda[0][j][k]);
00157             }
00158         }
00159         memcpy(inout_feat[i][0], tmp, fcb->stream_len[0] * sizeof(mfcc_t));
00160     }
00161     ckd_free(tmp);
00162 }