00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 #ifndef __PS_CMDLN_MACRO_H__
00041 #define __PS_CMDLN_MACRO_H__
00042
00043 #include <cmd_ln.h>
00044 #include <feat.h>
00045 #include <fe.h>
00046
00048 #define POCKETSPHINX_OPTIONS \
00049 waveform_to_cepstral_command_line_macro(), \
00050 cepstral_to_feature_command_line_macro(), \
00051 POCKETSPHINX_ACMOD_OPTIONS, \
00052 POCKETSPHINX_BEAM_OPTIONS, \
00053 POCKETSPHINX_SEARCH_OPTIONS, \
00054 POCKETSPHINX_DICT_OPTIONS, \
00055 POCKETSPHINX_NGRAM_OPTIONS, \
00056 POCKETSPHINX_FSG_OPTIONS, \
00057 POCKETSPHINX_DEBUG_OPTIONS
00058
00060 #define POCKETSPHINX_DEBUG_OPTIONS \
00061 { "-logfn", \
00062 ARG_STRING, \
00063 NULL, \
00064 "File to write log messages in" \
00065 }, \
00066 { "-mfclogdir", \
00067 ARG_STRING, \
00068 NULL, \
00069 "Directory to log feature files to" \
00070 }, \
00071 { "-rawlogdir", \
00072 ARG_STRING, \
00073 NULL, \
00074 "Directory to log raw audio files to" }
00075
00077 #define POCKETSPHINX_BEAM_OPTIONS \
00078 { "-beam", \
00079 ARG_FLOAT64, \
00080 "1e-48", \
00081 "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \
00082 { "-wbeam", \
00083 ARG_FLOAT64, \
00084 "7e-29", \
00085 "Beam width applied to word exits" }, \
00086 { "-pbeam", \
00087 ARG_FLOAT64, \
00088 "1e-48", \
00089 "Beam width applied to phone transitions" }, \
00090 { "-lpbeam", \
00091 ARG_FLOAT64, \
00092 "1e-40", \
00093 "Beam width applied to last phone in words" }, \
00094 { "-lponlybeam", \
00095 ARG_FLOAT64, \
00096 "7e-29", \
00097 "Beam width applied to last phone in single-phone words" }, \
00098 { "-fwdflatbeam", \
00099 ARG_FLOAT64, \
00100 "1e-64", \
00101 "Beam width applied to every frame in second-pass flat search" }, \
00102 { "-fwdflatwbeam", \
00103 ARG_FLOAT64, \
00104 "7e-29", \
00105 "Beam width applied to word exits in second-pass flat search" }
00106
00108 #define POCKETSPHINX_SEARCH_OPTIONS \
00109 { "-compallsen", \
00110 ARG_BOOLEAN, \
00111 "no", \
00112 "Compute all senone scores in every frame (can be faster when there are many senones)" }, \
00113 { "-fwdtree", \
00114 ARG_BOOLEAN, \
00115 "yes", \
00116 "Run forward lexicon-tree search (1st pass)" }, \
00117 { "-fwdflat", \
00118 ARG_BOOLEAN, \
00119 "yes", \
00120 "Run forward flat-lexicon search over word lattice (2nd pass)" }, \
00121 { "-bestpath", \
00122 ARG_BOOLEAN, \
00123 "yes", \
00124 "Run bestpath (Dijkstra) search over word lattice (3rd pass)" }, \
00125 { "-backtrace", \
00126 ARG_BOOLEAN, \
00127 "no", \
00128 "Print results and backtraces to log file." }, \
00129 { "-latsize", \
00130 ARG_INT32, \
00131 "5000", \
00132 "Initial backpointer table size" }, \
00133 { "-maxwpf", \
00134 ARG_INT32, \
00135 "-1", \
00136 "Maximum number of distinct word exits at each frame (or -1 for no pruning)" }, \
00137 { "-maxhmmpf", \
00138 ARG_INT32, \
00139 "-1", \
00140 "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" }, \
00141 { "-maxhistpf", \
00142 ARG_INT32, \
00143 "100", \
00144 "Max no. of histories to maintain at each frame (UNUSED)" }, \
00145 { "-fwdflatefwid", \
00146 ARG_INT32, \
00147 "4", \
00148 "Minimum number of end frames for a word to be searched in fwdflat search" }, \
00149 { "-fwdflatsfwin", \
00150 ARG_INT32, \
00151 "25", \
00152 "Window of frames in lattice to search for successor words in fwdflat search " }
00153
00155 #define POCKETSPHINX_FSG_OPTIONS \
00156 { "-fsg", \
00157 ARG_STRING, \
00158 NULL, \
00159 "Sphinx format finite state grammar file"}, \
00160 { "-jsgf", \
00161 ARG_STRING, \
00162 NULL, \
00163 "JSGF grammar file" }, \
00164 { "-toprule", \
00165 ARG_STRING, \
00166 NULL, \
00167 "Start rule for JSGF (first public rule is default)" }, \
00168 { "-fsgusealtpron", \
00169 ARG_BOOLEAN, \
00170 "yes", \
00171 "Add alternate pronunciations to FSG"}, \
00172 { "-fsgusefiller", \
00173 ARG_BOOLEAN, \
00174 "yes", \
00175 "Insert filler words at each state."}
00176
00178 #define POCKETSPHINX_NGRAM_OPTIONS \
00179 { "-lm", \
00180 ARG_STRING, \
00181 NULL, \
00182 "Word trigram language model input file" }, \
00183 { "-lmctl", \
00184 ARG_STRING, \
00185 NULL, \
00186 "Specify a set of language model\n"}, \
00187 { "-lmname", \
00188 ARG_STRING, \
00189 "default", \
00190 "Which language model in -lmctl to use by default"}, \
00191 { "-lw", \
00192 ARG_FLOAT32, \
00193 "6.5", \
00194 "Language model probability weight" }, \
00195 { "-fwdflatlw", \
00196 ARG_FLOAT32, \
00197 "8.5", \
00198 "Language model probability weight for flat lexicon (2nd pass) decoding" }, \
00199 { "-bestpathlw", \
00200 ARG_FLOAT32, \
00201 "9.5", \
00202 "Language model probability weight for bestpath search" }, \
00203 { "-ascale", \
00204 ARG_FLOAT32, \
00205 "20.0", \
00206 "Inverse of acoustic model scale for confidence score calculation" }, \
00207 { "-wip", \
00208 ARG_FLOAT32, \
00209 "0.65", \
00210 "Word insertion penalty" }, \
00211 { "-nwpen", \
00212 ARG_FLOAT32, \
00213 "1.0", \
00214 "New word transition penalty" }, \
00215 { "-pip", \
00216 ARG_FLOAT32, \
00217 "1.0", \
00218 "Phone insertion penalty" }, \
00219 { "-uw", \
00220 ARG_FLOAT32, \
00221 "1.0", \
00222 "Unigram weight" }, \
00223 { "-silprob", \
00224 ARG_FLOAT32, \
00225 "0.005", \
00226 "Silence word transition probability" }, \
00227 { "-fillprob", \
00228 ARG_FLOAT32, \
00229 "1e-8", \
00230 "Filler word transition probability" }
00231
00233 #define POCKETSPHINX_DICT_OPTIONS \
00234 { "-dict", \
00235 REQARG_STRING, \
00236 NULL, \
00237 "Main pronunciation dictionary (lexicon) input file" }, \
00238 { "-fdict", \
00239 ARG_STRING, \
00240 NULL, \
00241 "Noise word pronunciation dictionary input file" }, \
00242 { "-dictcase", \
00243 ARG_BOOLEAN, \
00244 "no", \
00245 "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" }, \
00246 { "-maxnewoov", \
00247 ARG_INT32, \
00248 "20", \
00249 "Maximum new OOVs that can be added at run time" }, \
00250 { "-usewdphones", \
00251 ARG_BOOLEAN, \
00252 "no", \
00253 "Use within-word phones only" }
00254
00256 #define POCKETSPHINX_ACMOD_OPTIONS \
00257 { "-hmm", \
00258 ARG_STRING, \
00259 NULL, \
00260 "Directory containing acoustic model files."}, \
00261 { "-featparams", \
00262 ARG_STRING, \
00263 NULL, \
00264 "File containing feature extraction parameters."}, \
00265 { "-mdef", \
00266 ARG_STRING, \
00267 NULL, \
00268 "Model definition input file" }, \
00269 { "-tmat", \
00270 ARG_STRING, \
00271 NULL, \
00272 "HMM state transition matrix input file" }, \
00273 { "-tmatfloor", \
00274 ARG_FLOAT32, \
00275 "0.0001", \
00276 "HMM state transition probability floor (applied to -tmat file)" }, \
00277 { "-mean", \
00278 ARG_STRING, \
00279 NULL, \
00280 "Mixture gaussian means input file" }, \
00281 { "-var", \
00282 ARG_STRING, \
00283 NULL, \
00284 "Mixture gaussian variances input file" }, \
00285 { "-varfloor", \
00286 ARG_FLOAT32, \
00287 "0.0001", \
00288 "Mixture gaussian variance floor (applied to data from -var file)" }, \
00289 { "-mixw", \
00290 ARG_STRING, \
00291 NULL, \
00292 "Senone mixture weights input file (uncompressed)" }, \
00293 { "-mixwfloor", \
00294 ARG_FLOAT32, \
00295 "0.0000001", \
00296 "Senone mixture weights floor (applied to data from -mixw file)" }, \
00297 { "-sendump", \
00298 ARG_STRING, \
00299 NULL, \
00300 "Senone dump (compressed mixture weights) input file" }, \
00301 { "-sdmap", \
00302 ARG_STRING, \
00303 NULL, \
00304 "Subspace distribution cluster map file" }, \
00305 { "-mmap", \
00306 ARG_BOOLEAN, \
00307 "yes", \
00308 "Use memory-mapped I/O (if possible) for model files" }, \
00309 { "-ds", \
00310 ARG_INT32, \
00311 "1", \
00312 "Frame GMM computation downsampling ratio" }, \
00313 { "-topn", \
00314 ARG_INT32, \
00315 "4", \
00316 "Number of top Gaussians to use in scoring" }, \
00317 { "-kdtree", \
00318 ARG_STRING, \
00319 NULL, \
00320 "kd-Tree file for Gaussian selection" }, \
00321 { "-kdmaxdepth", \
00322 ARG_INT32, \
00323 "0", \
00324 "Maximum depth of kd-Trees to use" }, \
00325 { "-kdmaxbbi", \
00326 ARG_INT32, \
00327 "-1", \
00328 "Maximum number of Gaussians per leaf node in kd-Trees" }, \
00329 { "-logbase", \
00330 ARG_FLOAT32, \
00331 "1.0001", \
00332 "Base in which all log-likelihoods calculated" }
00333
00334 #define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL }
00335
00336 #endif