• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/libsphinxbase/lm/ngram_model_internal.h

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2007 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 /*
00038  * \file ngram_model_internal.h Internal structures for N-Gram models
00039  *
00040  * Author: David Huggins-Daines <dhuggins@cs.cmu.edu>
00041  */
00042 
00043 #ifndef __NGRAM_MODEL_INTERNAL_H__
00044 #define __NGRAM_MODEL_INTERNAL_H__
00045 
00046 #include "ngram_model.h"
00047 #include "hash_table.h"
00048 
00055 struct ngram_model_s {
00056     int refcount;       
00057     int32 *n_counts;    
00058     int32 n_1g_alloc;   
00059     int32 n_words;      
00061     uint8 n;            
00062     uint8 n_classes;    
00063     uint8 writable;     
00064     uint8 flags;        
00066     logmath_t *lmath;   
00067     float32 lw;         
00068     int32 log_wip;      
00069     int32 log_uw;       
00070     int32 log_uniform;  
00071     int32 log_uniform_weight; 
00072     int32 log_zero;     
00073     char **word_str;    
00074     hash_table_t *wid;  
00075     struct ngram_class_s **classes; 
00076     struct ngram_funcs_s *funcs;   
00077 };
00078 
00082 struct ngram_class_s {
00083     int32 tag_wid;  
00084     int32 start_wid; 
00085     int32 n_words;   
00086     int32 *prob1;    
00090     struct ngram_hash_s {
00091         int32 wid;    
00092         int32 prob1;  
00093         int32 next;   
00094     } *nword_hash;
00095     int32 n_hash;       
00096     int32 n_hash_inuse; 
00097 };
00098 
00099 #define NGRAM_HASH_SIZE 128
00100 
00101 #define NGRAM_BASEWID(wid) ((wid)&0xffffff)
00102 #define NGRAM_CLASSID(wid) (((wid)>>24) & 0x7f)
00103 #define NGRAM_CLASSWID(wid,classid) (((classid)<<24) | 0x80000000 | (wid))
00104 #define NGRAM_IS_CLASSWID(wid) ((wid)&0x80000000)
00105 
00106 #define UG_ALLOC_STEP 10
00107 
00109 typedef struct ngram_funcs_s {
00113     void (*free)(ngram_model_t *model);
00117     int (*apply_weights)(ngram_model_t *model,
00118                          float32 lw,
00119                          float32 wip,
00120                          float32 uw);
00124     int32 (*score)(ngram_model_t *model,
00125                    int32 wid,
00126                    int32 *history,
00127                    int32 n_hist,
00128                    int32 *n_used);
00133     int32 (*raw_score)(ngram_model_t *model,
00134                        int32 wid,
00135                        int32 *history,
00136                        int32 n_hist,
00137                        int32 *n_used);
00149     int32 (*add_ug)(ngram_model_t *model,
00150                     int32 wid, int32 lweight);
00154     void (*flush)(ngram_model_t *model);
00155 } ngram_funcs_t;
00156 
00160 typedef struct classdef_s {
00161     char **words;
00162     float32 *weights;
00163     int32 n_words;
00164 } classdef_t;
00165 
00169 int32
00170 ngram_model_init(ngram_model_t *model,
00171                  ngram_funcs_t *funcs,
00172                  logmath_t *lmath,
00173                  int32 n, int32 n_unigram);
00174 
00178 ngram_model_t *ngram_model_arpa_read(cmd_ln_t *config,
00179                                      const char *file_name,
00180                                      logmath_t *lmath);
00184 ngram_model_t *ngram_model_dmp_read(cmd_ln_t *config,
00185                                     const char *file_name,
00186                                     logmath_t *lmath);
00190 ngram_model_t *ngram_model_dmp32_read(cmd_ln_t *config,
00191                                      const char *file_name,
00192                                      logmath_t *lmath);
00193 
00197 int ngram_model_arpa_write(ngram_model_t *model,
00198                            const char *file_name);
00202 int ngram_model_dmp_write(ngram_model_t *model,
00203                           const char *file_name);
00207 int ngram_model_dmp32_write(ngram_model_t *model,
00208                             const char *file_name);
00209 
00213 int32 read_classdef_file(hash_table_t *classes, const char *classdef_file);
00214 
00218 void classdef_free(classdef_t *classdef);
00219 
00223 ngram_class_t *ngram_class_new(ngram_model_t *model, int32 tag_wid,
00224                                int32 start_wid, glist_t classwords);
00225 
00229 void ngram_class_free(ngram_class_t *lmclass);
00230 
00236 int32 ngram_class_prob(ngram_class_t *lmclass, int32 wid);
00237 
00238 #endif /* __NGRAM_MODEL_INTERNAL_H__ */

Generated on Fri Jan 14 2011 for SphinxBase by  doxygen 1.7.1