SphinxBase 0.6

src/sphinx_fe/cmd_ln_defn.h

00001 /* ====================================================================
00002  * Copyright (c) 1998-2000 Carnegie Mellon University.  All rights 
00003  * reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * 1. Redistributions of source code must retain the above copyright
00010  *    notice, this list of conditions and the following disclaimer. 
00011  *
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  *    notice, this list of conditions and the following disclaimer in
00014  *    the documentation and/or other materials provided with the
00015  *    distribution.
00016  *
00017  * This work was supported in part by funding from the Defense Advanced 
00018  * Research Projects Agency and the National Science Foundation of the 
00019  * United States of America, and the CMU Sphinx Speech Consortium.
00020  *
00021  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00022  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00023  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00024  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00025  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00026  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00027  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00028  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00029  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00030  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00031  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  *
00033  * ====================================================================
00034  *
00035  */
00036 /*********************************************************************
00037  *
00038  * File: cmd_ln_defn.h
00039  * 
00040  * Description: 
00041  *      Command line argument definition
00042  *
00043  * Author: 
00044  *      
00045  *********************************************************************/
00046 
00047 #ifndef CMD_LN_DEFN_H
00048 #define CMD_LN_DEFN_H
00049 
00050 #include <sphinxbase/cmd_ln.h>
00051 #include <sphinxbase/fe.h>
00052 
00053 const char helpstr[] =
00054   "Description: \n\
00055 Extract acoustic features form from audio file.\n\
00056 \n\
00057 The main parameters that affect the final output, with typical values, are:\n\
00058 \n\
00059 samprate, typically 8000, 11025, or 16000\n\
00060 lowerf, 130, 200, 130, for the respective sampling rates above\n\
00061 upperf, 3700, 5200, 6800, for the respective sampling rates above\n\
00062 nfilt, 31, 37, 40, for the respective sampling rates above\n\
00063 nfft, 256 or 512\n\
00064 format, raw or nist or mswav\n\
00065 \"";
00066 
00067 const char examplestr[] =
00068   "Example: \n\
00069 This example creates a cepstral file named \"output.mfc\" from an input audio file named \"input.raw\", which is a raw audio file (no header information), which was originally sampled at 16kHz. \n\
00070 \n\
00071 sphinx_fe -i  input.raw \n\
00072         -o   output.mfc \n\
00073         -input_endian little \n\
00074         -samprate  16000 \n\
00075         -lowerf    130 \n\
00076         -upperf    6800 \n\
00077         -nfilt     40 \n\
00078         -nfft      512";
00079 
00080 static arg_t defn[] = {
00081   { "-help",
00082     ARG_BOOLEAN,
00083     "no",
00084     "Shows the usage of the tool"},
00085   
00086   { "-example",
00087     ARG_BOOLEAN,
00088     "no",
00089     "Shows example of how to use the tool"},
00090 
00091   waveform_to_cepstral_command_line_macro(),
00092 
00093   { "-argfile",
00094     ARG_STRING,
00095     NULL,
00096     "Argument file (e.g. feat.params from an acoustic model) to read parameters from.  This will override anything set in other command line arguments." },
00097   
00098   { "-i",
00099     ARG_STRING,
00100     NULL,
00101     "Single audio input file" },
00102   
00103   { "-o",
00104     ARG_STRING,
00105     NULL,
00106     "Single cepstral output file" },
00107   
00108   { "-c",
00109     ARG_STRING,
00110     NULL,
00111     "Control file for batch processing" },
00112   
00113   { "-nskip",
00114     ARG_INT32,
00115     "0",
00116     "If a control file was specified, the number of utterances to skip at the head of the file" },
00117   
00118   { "-runlen",
00119     ARG_INT32,
00120     "-1",
00121     "If a control file was specified, the number of utterances to process, or -1 for all" },
00122 
00123   { "-part",
00124     ARG_INT32,
00125     "0",
00126     "Index of the part to run (supersedes -nskip and -runlen if non-zero)" },
00127   
00128   { "-npart",
00129     ARG_INT32,
00130     "0",
00131     "Number of parts to run in (supersedes -nskip and -runlen if non-zero)" },
00132   
00133   { "-di",
00134     ARG_STRING,
00135     NULL,
00136     "Input directory, input file names are relative to this, if defined" },
00137   
00138   { "-ei",
00139     ARG_STRING,
00140     NULL,
00141     "Input extension to be applied to all input files" },
00142   
00143   { "-do",
00144     ARG_STRING,
00145     NULL,
00146     "Output directory, output files are relative to this" },
00147   
00148   { "-eo",
00149     ARG_STRING,
00150     NULL,
00151     "Output extension to be applied to all output files" },
00152   
00153   { "-build_outdirs",
00154     ARG_BOOLEAN,
00155     "yes",
00156     "Create missing subdirectories in output directory" },
00157 
00158   { "-sph2pipe",
00159     ARG_BOOLEAN,
00160     "no",
00161     "Input is NIST sphere (possibly with Shorten), use sph2pipe to convert" },
00162 
00163   { "-nist",
00164     ARG_BOOLEAN,
00165     "no",
00166     "Defines input format as NIST sphere" },
00167   
00168   { "-raw",
00169     ARG_BOOLEAN,
00170     "no",
00171     "Defines input format as raw binary data" },
00172   
00173   { "-mswav",
00174     ARG_BOOLEAN,
00175     "no",
00176     "Defines input format as Microsoft Wav (RIFF)" },
00177   
00178 #ifdef HAVE_SNDFILE_H
00179   { "-sndfile",
00180     ARG_BOOLEAN,
00181     "no",
00182     "Use libsndfile to read input data" },
00183 #endif
00184   
00185   { "-nchans",
00186     ARG_INT32,
00187     "1",
00188     "Number of channels of data (interlaced samples assumed)" },
00189   
00190   { "-whichchan",
00191     ARG_INT32,
00192     "0",
00193     "Channel to process (numbered from 1), or 0 to mix all channels" },
00194   
00195   { "-ofmt",
00196     ARG_STRING,
00197     "sphinx",
00198     "Format of output files - one of sphinx, htk, text." },
00199   
00200   { "-mach_endian",
00201     ARG_STRING,
00202 #ifdef WORDS_BIGENDIAN
00203     "big",
00204 #else
00205     "little",
00206 #endif
00207     "Endianness of machine, big or little" },
00208   
00209   { "-blocksize",
00210     ARG_INT32,
00211     "2048",
00212     "Number of samples to read at a time." },
00213 
00214   { "-spec2cep",
00215     ARG_BOOLEAN,
00216     "no",
00217     "Input is log spectral files, output is cepstral files" },
00218 
00219   { "-cep2spec",
00220     ARG_BOOLEAN,
00221     "no",
00222     "Input is cepstral files, output is log spectral files" },
00223 
00224   { NULL, 0, NULL, NULL }
00225 };
00226 
00227     
00228 #define CMD_LN_DEFN_H
00229 
00230 #endif /* CMD_LN_DEFN_H */ 
00231 
00232 /*
00233  * Log record.  Maintained by RCS.
00234  *
00235  * $Log: cmd_ln_defn.h,v $
00236  * Revision 1.7  2006/02/25 00:53:48  egouvea
00237  * Added the flag "-seed". If dither is being used and the seed is less
00238  * than zero, the random number generator is initialized with time(). If
00239  * it is at least zero, it's initialized with the provided seed. This way
00240  * we have the benefit of having dither, and the benefit of being
00241  * repeatable.
00242  *
00243  * This is consistent with what sphinx3 does. Well, almost. The random
00244  * number generator is still what the compiler provides.
00245  *
00246  * Also, moved fe_init_params to fe_interface.c, so one can initialize a
00247  * variable of type param_t with meaningful values.
00248  *
00249  * Revision 1.6  2006/02/17 00:31:34  egouvea
00250  * Removed switch -melwarp. Changed the default for window length to
00251  * 0.025625 from 0.256 (so that a window at 16kHz sampling rate has
00252  * exactly 410 samples). Cleaned up include's. Replaced some E_FATAL()
00253  * with E_WARN() and return.
00254  *
00255  * Revision 1.5  2006/02/16 00:18:26  egouvea
00256  * Implemented flexible warping function. The user can specify at run
00257  * time which of several shapes they want to use. Currently implemented
00258  * are an affine function (y = ax + b), an inverse linear (y = a/x) and a
00259  * piecewise linear (y = ax, up to a frequency F, and then it "breaks" so
00260  * Nyquist frequency matches in both scales.
00261  *
00262  * Added two switches, -warp_type and -warp_params. The first specifies
00263  * the type, which valid values:
00264  *
00265  * -inverse or inverse_linear
00266  * -linear or affine
00267  * -piecewise or piecewise_linear
00268  *
00269  * The inverse_linear is the same as implemented by EHT. The -mel_warp
00270  * switch was kept for compatibility (maybe remove it in the
00271  * future?). The code is compatible with EHT's changes: cepstra created
00272  * from code after his changes should be the same as now. Scripts that
00273  * worked with his changes should work now without changes. Tested a few
00274  * cases, same results.
00275  *
00276  * Revision 1.4  2006/02/14 20:56:54  eht
00277  * Implement an argument -melwarp that changes the standard mel-scale
00278  * equation from:
00279  *      M(f) = 2595 * log10( 1 + f/700 )
00280  * to:
00281  *      M(f,w) = 2595 * log10( 1 + f/(700*w))
00282  *
00283  * So, 1.0 means no warp,  w > 1.0 means linear compression w < 1.0 means
00284  * linear expansion.
00285  *
00286  * Implement argument -nskip and -runlen arguments so that a subset of the
00287  * utterances in the control file can be executed.  Allows a simple
00288  * distribution of wave2feat processing over N processors.
00289  *
00290  * Revision 1.3  2005/05/19 21:21:55  egouvea
00291  * Bug #1176394: example bug
00292  *
00293  * Revision 1.2  2004/11/23 04:14:06  egouvea
00294  * Fixed bug in cmd_ln.c in which a wrong boolean argument led into an
00295  * infinite loop, and fixed the help and example strings, getting rid of
00296  * spaces, so that the appearance is better.
00297  *
00298  * Revision 1.1  2004/09/09 17:59:30  egouvea
00299  * Adding missing files to wave2feat
00300  *
00301  *
00302  *
00303  */