• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libsphinxbase/lm/lm3g_model.h

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2007 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * \file lm3g_model.h Core Sphinx 3-gram code used in
00039  * DMP/DMP32/ARPA (for now) model code.
00040  *
00041  * Author: A cast of thousands, probably.
00042  */
00043 
00044 #ifndef __NGRAM_MODEL_LM3G_H__
00045 #define __NGRAM_MODEL_LM3G_H__
00046 
00047 #include <listelem_alloc.h>
00048 
00049 #include "ngram_model_internal.h"
00050 
00054 typedef union {
00055     float32 f;
00056     int32 l;
00057 } lmprob_t;
00058 
00062 typedef struct unigram_s {
00063     lmprob_t prob1;     
00064     lmprob_t bo_wt1;    
00065     int32 bigrams;      
00066 } unigram_t;
00067 
00071 typedef struct bigram_s bigram_t;
00075 typedef struct trigram_s trigram_t;
00076 
00077 
00078 /*
00079  * To conserve space, bigram info is kept in many tables.  Since the number
00080  * of distinct values << #bigrams, these table indices can be 16-bit values.
00081  * prob2 and bo_wt2 are such indices, but keeping trigram index is less easy.
00082  * It is supposed to be the index of the first trigram entry for each bigram.
00083  * But such an index cannot be represented in 16-bits, hence the following
00084  * segmentation scheme: Partition bigrams into segments of BG_SEG_SZ
00085  * consecutive entries, such that #trigrams in each segment <= 2**16 (the
00086  * corresponding trigram segment).  The bigram_t.trigrams value is then a
00087  * 16-bit relative index within the trigram segment.  A separate table--
00088  * lm_t.tseg_base--has the index of the 1st trigram for each bigram segment.
00089  */
00090 #define BG_SEG_SZ       512     /* chosen so that #trigram/segment <= 2**16 */
00091 #define LOG_BG_SEG_SZ   9
00092 
00100 typedef struct tginfo_s {
00101     int32 w1;                   
00103     int32 n_tg;                 
00104     int32 bowt;                 
00105     int32 used;                 
00106     trigram_t *tg;              
00107     struct tginfo_s *next;      
00108 } tginfo_t;
00109 
00113 typedef struct lm3g_model_s {
00114     unigram_t *unigrams;
00115     bigram_t *bigrams;
00116     trigram_t *trigrams;
00117     lmprob_t *prob2;         
00118     int32 n_prob2;           
00119     lmprob_t *bo_wt2;        
00120     int32 n_bo_wt2;          
00121     lmprob_t *prob3;         
00122     int32 n_prob3;           
00123     int32 *tseg_base;    
00125     tginfo_t **tginfo;   
00127     listelem_alloc_t *le; 
00128 } lm3g_model_t;
00129 
00130 void lm3g_tginfo_free(ngram_model_t *base, lm3g_model_t *lm3g);
00131 void lm3g_tginfo_reset(ngram_model_t *base, lm3g_model_t *lm3g);
00132 void lm3g_apply_weights(ngram_model_t *base,
00133                         lm3g_model_t *lm3g,
00134                         float32 lw, float32 wip, float32 uw);
00135 int32 lm3g_add_ug(ngram_model_t *base,
00136                   lm3g_model_t *lm3g, int32 wid, int32 lweight);
00137 
00138 
00139 #endif /* __NGRAM_MODEL_LM3G_H__ */

Generated on Fri Jan 14 2011 for SphinxBase by  doxygen 1.7.1