• Main Page
  • Related Pages
  • Data Structures
  • Files
  • File List
  • Globals

src/sphinx_fe/wave2feat.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1996-2004 Carnegie Mellon University.  All rights 
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #if !defined(WIN32) || defined(GNUWINCE)
00040 #include <unistd.h>
00041 #include <sys/file.h>
00042 #include <sys/fcntl.h>
00043 #if !defined(O_BINARY)
00044 #define O_BINARY 0
00045 #endif
00046 #endif
00047 #include <string.h>
00048 #include <time.h>
00049 #include <sys/types.h>
00050 #include <sys/stat.h>
00051 #include <fcntl.h>
00052 #include <assert.h>
00053 
00054 #ifdef _WIN32
00055 #pragma warning (disable: 4996 4018)
00056 #endif
00057 
00058 #if defined(WIN32) && !defined(GNUWINCE)
00059 #include <io.h>
00060 #include <errno.h>
00061 #endif
00062 
00063 #ifdef HAVE_CONFIG_H
00064 #include <config.h>
00065 #endif
00066 
00067 #include "fe.h"
00068 #include "strfuncs.h"
00069 #include "cmd_ln.h"
00070 #include "err.h"
00071 #include "ckd_alloc.h"
00072 #include "byteorder.h"
00073 
00074 #include "wave2feat.h"
00075 #include "cmd_ln_defn.h"
00076 
00077 struct globals_s {
00078     cmd_ln_t *config;
00079     int32 nskip;
00080     int32 runlen;
00081     char const *wavfile;
00082     char const *cepfile;
00083     char const *ctlfile;
00084     char const *wavdir;
00085     char const *cepdir;
00086     char const *wavext;
00087     char const *cepext;
00088     int32 input_format;
00089     int32 is_batch;
00090     int32 is_single;
00091     int32 blocksize;
00092     int32 machine_endian;
00093     int32 input_endian;
00094     int32 output_endian;
00095     int32 nchans;
00096     int32 whichchan;
00097     int32 convert;
00098     int32 verbose;
00099     int32 logspec;
00100 };
00101 typedef struct globals_s globals_t;
00102 
00103 globals_t *fe_parse_options(int argc, char **argv);
00104 int32 fe_convert_files(globals_t * P);
00105 int32 fe_build_filenames(globals_t * P, char *fileroot, char **infilename,
00106                          char **outfilename);
00107 int32 fe_openfiles(globals_t * P, fe_t * FE, char *infile, int32 * fp_in,
00108                    int32 * nsamps, int32 * nframes, int32 * nblocks,
00109                    char *outfile, int32 * fp_out);
00110 int32 fe_readblock_spch(globals_t * P, int32 fp, int32 nsamps,
00111                         int16 * buf);
00112 int32 fe_writeblock_feat(globals_t * P, fe_t * FE, int32 fp, int32 nframes,
00113                          mfcc_t ** feat);
00114 int32 fe_closefiles(int32 fp_in, int32 fp_out);
00115 int32 fe_convert_with_dct(globals_t * P, fe_t * FE, char *infile, char *outfile);
00116 
00117 /*       
00118          7-Feb-00 M. Seltzer - wrapper created for new front end -
00119          does blockstyle processing if necessary. If input stream is
00120          greater than DEFAULT_BLOCKSIZE samples (currently 200000)
00121          then it will read and write in DEFAULT_BLOCKSIZE chunks. 
00122          
00123          Had to change fe_process_utt(). Now the 2d feature array
00124          is allocated internally to that function rather than
00125          externally in the wrapper. 
00126          
00127          Added usage display with -help switch for help
00128 
00129          14-Feb-00 M. Seltzer - added NIST header parsing for 
00130          big endian/little endian parsing. kind of a hack.
00131 
00132          changed -wav switch to -nist to avoid future confusion with
00133          MS wav files
00134          
00135          added -mach_endian switch to specify machine's byte format
00136 */
00137 
00138 int32
00139 main(int32 argc, char **argv)
00140 {
00141     globals_t *P;
00142 
00143     P = fe_parse_options(argc, argv);
00144     if (fe_convert_files(P) != FE_SUCCESS) {
00145         E_FATAL("error converting files...exiting\n");
00146     }
00147     free(P);
00148     return (0);
00149 }
00150 
00151 
00152 int32
00153 fe_convert_files(globals_t * P)
00154 {
00155 
00156     fe_t *FE;
00157     char *infile, *outfile, fileroot[MAXCHARS];
00158     FILE *ctlfile;
00159     int16 *spdata = NULL;
00160     int32 splen =
00161         0, total_samps, frames_proc, nframes, nblocks, last_frame;
00162     int32 fp_in, fp_out, last_blocksize = 0, curr_block, total_frames;
00163     mfcc_t **cep = NULL, **last_frame_cep;
00164     int32 return_value;
00165     int32 warn_zero_energy = 0;
00166     int32 process_utt_return_value;
00167 
00168     if ((FE = fe_init_auto_r(P->config)) == NULL) {
00169         E_ERROR("memory alloc failed...exiting\n");
00170         return (FE_MEM_ALLOC_ERROR);
00171     }
00172 
00173     if (P->is_batch) {
00174         int32 nskip = P->nskip;
00175         int32 runlen = P->runlen;
00176 
00177         if ((ctlfile = fopen(P->ctlfile, "r")) == NULL) {
00178             E_ERROR("Unable to open control file %s\n", P->ctlfile);
00179             fe_free(FE);
00180             return (FE_CONTROL_FILE_ERROR);
00181         }
00182         while (fscanf(ctlfile, "%s", fileroot) != EOF) {
00183             if (nskip > 0) {
00184                 --nskip;
00185                 continue;
00186             }
00187             if (runlen > 0) {
00188                 --runlen;
00189             }
00190             else if (runlen == 0) {
00191                 break;
00192             }
00193 
00194             fe_build_filenames(P, fileroot, &infile, &outfile);
00195 
00196             if (P->verbose)
00197                 E_INFO("%s\n", infile);
00198 
00199             if (P->convert) {
00200                 /* Special case for doing various DCTs */
00201                 return_value = fe_convert_with_dct(P, FE, infile, outfile);
00202                 ckd_free(infile);
00203                 ckd_free(outfile);
00204                 infile = outfile = NULL;
00205                 if (return_value != FE_SUCCESS) {
00206                     fe_free(FE);
00207                     return return_value;
00208                 }
00209                 continue;
00210             }
00211             return_value =
00212                 fe_openfiles(P, FE, infile, &fp_in,
00213                              &total_samps, &nframes, &nblocks,
00214                              outfile, &fp_out);
00215             ckd_free(infile);
00216             ckd_free(outfile);
00217             infile = outfile = NULL;
00218             if (return_value != FE_SUCCESS) {
00219                 fe_free(FE);
00220                 return (return_value);
00221             }
00222 
00223             warn_zero_energy = 0;
00224 
00225             if (nblocks * P->blocksize >= total_samps)
00226                 last_blocksize =
00227                     total_samps - (nblocks - 1) * P->blocksize;
00228 
00229             if (!fe_start_utt(FE)) {
00230                 curr_block = 1;
00231                 total_frames = frames_proc = 0;
00232                 /*execute this loop only if there is more than 1 block to
00233                    be processed */
00234                 while (curr_block < nblocks) {
00235                     splen = P->blocksize;
00236                     if ((spdata =
00237                          (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00238                         E_ERROR
00239                             ("Unable to allocate memory block of %d shorts for input speech\n",
00240                              splen);
00241                         fe_free(FE);
00242                         return (FE_MEM_ALLOC_ERROR);
00243                     }
00244                     if (fe_readblock_spch
00245                         (P, fp_in, splen, spdata) != splen) {
00246                         E_ERROR("error reading speech data\n");
00247                         fe_free(FE);
00248                         return (FE_INPUT_FILE_READ_ERROR);
00249                     }
00250                     process_utt_return_value =
00251                         fe_process_utt(FE, spdata,
00252                                        splen, &cep, &frames_proc);
00253                     if (process_utt_return_value != FE_SUCCESS) {
00254                         if (FE_ZERO_ENERGY_ERROR ==
00255                             process_utt_return_value) {
00256                             warn_zero_energy = 1;
00257                         }
00258                         else {
00259                             fe_free(FE);
00260                             return (process_utt_return_value);
00261                         }
00262                     }
00263                     if (frames_proc > 0)
00264                         fe_writeblock_feat(P, FE,
00265                                            fp_out, frames_proc, cep);
00266                     if (cep != NULL) {
00267                         ckd_free_2d((void **) cep);
00268                         cep = NULL;
00269                     }
00270                     curr_block++;
00271                     total_frames += frames_proc;
00272                     free(spdata);
00273                     spdata = NULL;
00274                 }
00275                 /* process last (or only) block */
00276                 free(spdata);
00277                 spdata = NULL;
00278                 splen = last_blocksize;
00279 
00280                 if ((spdata =
00281                      (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00282                     E_ERROR
00283                         ("Unable to allocate memory block of %d shorts for input speech\n",
00284                          splen);
00285                     fe_free(FE);
00286                     return (FE_MEM_ALLOC_ERROR);
00287                 }
00288 
00289                 if (fe_readblock_spch(P, fp_in, splen, spdata) != splen) {
00290                     E_ERROR("error reading speech data\n");
00291                     fe_free(FE);
00292                     return (FE_INPUT_FILE_READ_ERROR);
00293                 }
00294 
00295                 process_utt_return_value =
00296                     fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00297                 if (process_utt_return_value != FE_SUCCESS) {
00298                     if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00299                         warn_zero_energy = 1;
00300                     }
00301                     else {
00302                         fe_free(FE);
00303                         return (process_utt_return_value);
00304                     }
00305                 }
00306                 if (frames_proc > 0)
00307                     fe_writeblock_feat(P, FE, fp_out, frames_proc, cep);
00308                 if (cep != NULL) {
00309                     ckd_free_2d((void **) cep);
00310                     cep = NULL;
00311                 }
00312                 curr_block++;
00313                 last_frame_cep =
00314                     (mfcc_t **) ckd_calloc_2d(1,
00315                                               fe_get_output_size(FE),
00316                                               sizeof(float32));
00317                 process_utt_return_value =
00318                     fe_end_utt(FE, last_frame_cep[0], &last_frame);
00319                 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00320                     warn_zero_energy = 1;
00321                 }
00322                 else {
00323                     assert(process_utt_return_value == FE_SUCCESS);
00324                 }
00325                 if (last_frame > 0) {
00326                     fe_writeblock_feat(P, FE, fp_out,
00327                                        last_frame, last_frame_cep);
00328                     frames_proc++;
00329                 }
00330                 total_frames += frames_proc;
00331 
00332                 fe_closefiles(fp_in, fp_out);
00333                 free(spdata);
00334                 spdata = NULL;
00335                 if (last_frame_cep != NULL) {
00336                     ckd_free_2d((void **)
00337                                 last_frame_cep);
00338                     last_frame_cep = NULL;
00339                 }
00340                 if (warn_zero_energy) {
00341                     E_WARN
00342                         ("File %s has some frames with zero energy. Consider using dither\n",
00343                          infile);
00344                 }
00345             }
00346             else {
00347                 E_ERROR("fe_start_utt() failed\n");
00348                 return (FE_START_ERROR);
00349             }
00350         }
00351     }
00352     else if (P->is_single) {
00353 
00354         fe_build_filenames(P, fileroot, &infile, &outfile);
00355         if (P->verbose)
00356             printf("%s\n", infile);
00357 
00358         /* Special case for doing various DCTs. */
00359         if (P->convert != WAV2FEAT) {
00360             int rv;
00361 
00362             rv = fe_convert_with_dct(P, FE, infile, outfile);
00363             ckd_free(infile);
00364             ckd_free(outfile);
00365             infile = outfile = NULL;
00366             fe_free(FE);
00367             return rv;
00368         }
00369 
00370         return_value =
00371             fe_openfiles(P, FE, infile, &fp_in, &total_samps,
00372                          &nframes, &nblocks, outfile, &fp_out);
00373         ckd_free(infile);
00374         ckd_free(outfile);
00375         infile = outfile = NULL;
00376         if (return_value != FE_SUCCESS) {
00377             fe_free(FE);
00378             return (return_value);
00379         }
00380 
00381         warn_zero_energy = 0;
00382 
00383         if (nblocks * P->blocksize >= total_samps)
00384             last_blocksize = total_samps - (nblocks - 1) * P->blocksize;
00385 
00386         if (!fe_start_utt(FE)) {
00387             curr_block = 1;
00388             total_frames = frames_proc = 0;
00389             /*execute this loop only if there are more than 1 block to
00390                be processed */
00391             while (curr_block < nblocks) {
00392                 splen = P->blocksize;
00393                 if ((spdata =
00394                      (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00395                     E_ERROR
00396                         ("Unable to allocate memory block of %d shorts for input speech\n",
00397                          splen);
00398                     fe_free(FE);
00399                     return (FE_MEM_ALLOC_ERROR);
00400                 }
00401                 if (fe_readblock_spch(P, fp_in, splen, spdata) != splen) {
00402                     E_ERROR("Error reading speech data\n");
00403                     fe_free(FE);
00404                     return (FE_INPUT_FILE_READ_ERROR);
00405                 }
00406                 process_utt_return_value =
00407                     fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00408                 if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00409                     warn_zero_energy = 1;
00410                 }
00411                 else {
00412                     assert(process_utt_return_value == FE_SUCCESS);
00413                 }
00414                 if (frames_proc > 0)
00415                     fe_writeblock_feat(P, FE, fp_out, frames_proc, cep);
00416                 if (cep != NULL) {
00417                     ckd_free_2d((void **) cep);
00418                     cep = NULL;
00419                 }
00420                 curr_block++;
00421                 total_frames += frames_proc;
00422                 if (spdata != NULL) {
00423                     free(spdata);
00424                     spdata = NULL;
00425                 }
00426             }
00427             /* process last (or only) block */
00428             if (spdata != NULL) {
00429                 free(spdata);
00430                 spdata = NULL;
00431             }
00432             splen = last_blocksize;
00433             if ((spdata = (int16 *) calloc(splen, sizeof(int16))) == NULL) {
00434                 E_ERROR
00435                     ("Unable to allocate memory block of %d shorts for input speech\n",
00436                      splen);
00437                 fe_free(FE);
00438                 return (FE_MEM_ALLOC_ERROR);
00439             }
00440             if (fe_readblock_spch(P, fp_in, splen, spdata) != splen) {
00441                 E_ERROR("Error reading speech data\n");
00442                 fe_free(FE);
00443                 return (FE_INPUT_FILE_READ_ERROR);
00444             }
00445             process_utt_return_value =
00446                 fe_process_utt(FE, spdata, splen, &cep, &frames_proc);
00447             free(spdata);
00448             spdata = NULL;
00449             if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00450                 warn_zero_energy = 1;
00451             }
00452             else {
00453                 assert(process_utt_return_value == FE_SUCCESS);
00454             }
00455             if (frames_proc > 0)
00456                 fe_writeblock_feat(P, FE, fp_out, frames_proc, cep);
00457             if (cep != NULL) {
00458                 ckd_free_2d((void **) cep);
00459                 cep = NULL;
00460             }
00461 
00462             curr_block++;
00463             last_frame_cep =
00464                 (mfcc_t **) ckd_calloc_2d(1,
00465                                           fe_get_output_size(FE),
00466                                           sizeof(float32));
00467             process_utt_return_value =
00468                 fe_end_utt(FE, last_frame_cep[0], &last_frame);
00469             if (FE_ZERO_ENERGY_ERROR == process_utt_return_value) {
00470                 warn_zero_energy = 1;
00471             }
00472             else {
00473                 assert(process_utt_return_value == FE_SUCCESS);
00474             }
00475             if (last_frame > 0) {
00476                 fe_writeblock_feat(P, FE, fp_out,
00477                                    last_frame, last_frame_cep);
00478                 frames_proc++;
00479             }
00480             total_frames += frames_proc;
00481 
00482             fe_closefiles(fp_in, fp_out);
00483             if (last_frame_cep != NULL) {
00484                 ckd_free_2d((void **) last_frame_cep);
00485                 last_frame_cep = NULL;
00486             }
00487         }
00488         else {
00489             E_ERROR("fe_start_utt() failed\n");
00490             fe_free(FE);
00491             return (FE_START_ERROR);
00492         }
00493 
00494         if (warn_zero_energy) {
00495             E_WARN
00496                 ("File %s has some frames with zero energy. Consider using dither\n",
00497                  infile);
00498         }
00499     }
00500     else {
00501         E_ERROR("Unknown mode - single or batch?\n");
00502         fe_free(FE);
00503         return (FE_UNKNOWN_SINGLE_OR_BATCH);
00504     }
00505 
00506     fe_free(FE);
00507     return (FE_SUCCESS);
00508 }
00509 
00510 void
00511 fe_validate_parameters(globals_t * P)
00512 {
00513 
00514     if ((P->is_batch) && (P->is_single)) {
00515         E_FATAL("You cannot define an input file and a control file\n");
00516     }
00517 
00518     if (P->wavfile == NULL && P->wavdir == NULL) {
00519         E_FATAL("No input file or file directory given\n");
00520     }
00521 
00522     if (P->cepfile == NULL && P->cepdir == NULL) {
00523         E_FATAL("No cepstra file or file directory given\n");
00524     }
00525 
00526     if (P->ctlfile == NULL && P->cepfile == NULL && P->wavfile == NULL) {
00527         E_FATAL("No control file given\n");
00528     }
00529 
00530     if (P->nchans > 1) {
00531         E_INFO("Files have %d channels of data\n", P->nchans);
00532         E_INFO("Will extract features for channel %d\n", P->whichchan);
00533     }
00534 
00535     if (P->whichchan > P->nchans) {
00536         E_FATAL("You cannot select channel %d out of %d\n",
00537                 P->whichchan, P->nchans);
00538     }
00539 
00540     if ((cmd_ln_float32_r(P->config, "-upperf") * 2)
00541         > cmd_ln_float32_r(P->config, "-samprate")) {
00542         E_WARN("Upper frequency higher than Nyquist frequency\n");
00543     }
00544 
00545     if (cmd_ln_boolean_r(P->config, "-doublebw")) {
00546         E_INFO("Will use double bandwidth filters\n");
00547     }
00548 
00549 }
00550 
00551 
00552 globals_t *
00553 fe_parse_options(int32 argc, char **argv)
00554 {
00555     globals_t *P;
00556     int32 format;
00557     char const *endian;
00558 
00559     P = ckd_calloc(1, sizeof(*P));
00560     P->config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE);
00561 
00562     /* Load arguments from a feat.params file if requested. */
00563     if (cmd_ln_str_r(P->config, "-argfile")) {
00564         P->config = cmd_ln_parse_file_r(P->config, defn,
00565                                         cmd_ln_str_r(P->config, "-argfile"),
00566                                         FALSE);
00567     }
00568 
00569     P->nskip = P->runlen = -1;
00570     P->wavfile = cmd_ln_str_r(P->config, "-i");
00571     if (P->wavfile != NULL) {
00572         P->is_single = 1;
00573     }
00574     P->cepfile = cmd_ln_str_r(P->config, "-o");
00575     P->ctlfile = cmd_ln_str_r(P->config, "-c");
00576     if (P->ctlfile != NULL) {
00577         char const *nskip;
00578         char const *runlen;
00579 
00580         P->is_batch = 1;
00581 
00582         nskip = cmd_ln_str_r(P->config, "-nskip");
00583         runlen = cmd_ln_str_r(P->config, "-runlen");
00584         if (nskip != NULL) {
00585             P->nskip = atoi(nskip);
00586         }
00587         if (runlen != NULL) {
00588             P->runlen = atoi(runlen);
00589         }
00590     }
00591     P->wavdir = cmd_ln_str_r(P->config, "-di");
00592     P->cepdir = cmd_ln_str_r(P->config, "-do");
00593     P->wavext = cmd_ln_str_r(P->config, "-ei");
00594     P->cepext = cmd_ln_str_r(P->config, "-eo");
00595     format = cmd_ln_int32_r(P->config, "-raw");
00596     if (format) {
00597         P->input_format = RAW;
00598     }
00599     format = cmd_ln_int32_r(P->config, "-nist");
00600     if (format) {
00601         P->input_format = NIST;
00602     }
00603     format = cmd_ln_int32_r(P->config, "-mswav");
00604     if (format) {
00605         P->input_format = MSWAV;
00606     }
00607 
00608     P->nchans = cmd_ln_int32_r(P->config, "-nchans");
00609     P->whichchan = cmd_ln_int32_r(P->config, "-whichchan");
00610     P->output_endian = BIG;
00611     P->blocksize = cmd_ln_int32_r(P->config, "-blocksize");
00612     endian = cmd_ln_str_r(P->config, "-mach_endian");
00613     if (!strcmp("big", endian)) {
00614         P->machine_endian = BIG;
00615     }
00616     else {
00617         if (!strcmp("little", endian)) {
00618             P->machine_endian = LITTLE;
00619         }
00620         else {
00621             E_FATAL("Machine must be big or little Endian\n");
00622         }
00623     }
00624     endian = cmd_ln_str_r(P->config, "-input_endian");
00625     if (!strcmp("big", endian)) {
00626         P->input_endian = BIG;
00627     }
00628     else {
00629         if (!strcmp("little", endian)) {
00630             P->input_endian = LITTLE;
00631         }
00632         else {
00633             E_FATAL("Input must be big or little Endian\n");
00634         }
00635     }
00636 
00637     if (cmd_ln_boolean_r(P->config, "-logspec")
00638         || cmd_ln_boolean_r(P->config, "-smoothspec"))
00639         P->logspec = TRUE;
00640     if (cmd_ln_boolean_r(P->config, "-spec2cep"))
00641         P->convert = SPEC2CEP;
00642     if (cmd_ln_boolean_r(P->config, "-cep2spec"))
00643         P->convert = CEP2SPEC;
00644 
00645     fe_validate_parameters(P);
00646 
00647     return (P);
00648 
00649 }
00650 
00651 int32
00652 fe_build_filenames(globals_t * P, char *fileroot, char **infilename,
00653                    char **outfilename)
00654 {
00655     char chanlabel[32];
00656 
00657     if (P->nchans > 1)
00658         sprintf(chanlabel, ".ch%d", P->whichchan);
00659 
00660     if (P->is_batch) {
00661         if (infilename != NULL) {
00662             *infilename = string_join(P->wavdir, "/",
00663                                       fileroot, ".",
00664                                       P->wavext, NULL);
00665         }
00666 
00667         if (outfilename != NULL) {
00668             if (P->nchans > 1)
00669                 *outfilename = string_join(P->cepdir, "/",
00670                                            fileroot, chanlabel,
00671                                            ".", P->cepext, NULL);
00672             else
00673                 *outfilename = string_join(P->cepdir, "/",
00674                                            fileroot, ".",
00675                                            P->cepext, NULL);
00676         }
00677     }
00678     else if (P->is_single) {
00679         if (infilename != NULL) {
00680             *infilename = ckd_salloc(P->wavfile);
00681         }
00682         if (outfilename != NULL) {
00683             *outfilename = ckd_salloc(P->cepfile);
00684         }
00685     }
00686     else {
00687         E_FATAL("Unspecified Batch or Single Mode\n");
00688     }
00689 
00690     return 0;
00691 }
00692 
00693 int32
00694 fe_openfiles(globals_t * P, fe_t * FE, char *infile, int32 * fp_in,
00695              int32 * nsamps, int32 * nframes, int32 * nblocks,
00696              char *outfile, int32 * fp_out)
00697 {
00698     struct stat filestats;
00699     int fp = 0, len = 0, outlen, numframes, numblocks;
00700     FILE *fp2;
00701     char line[MAXCHARS];
00702     int got_it = 0;
00703 
00704 
00705     /* Note: this is kind of a hack to read the byte format from the
00706        NIST header */
00707     if (P->input_format == NIST) {
00708         if ((fp2 = fopen(infile, "rb")) == NULL) {
00709             E_ERROR_SYSTEM("Cannot read %s", infile);
00710             return (FE_INPUT_FILE_READ_ERROR);
00711         }
00712         *line = 0;
00713         got_it = 0;
00714         while (strcmp(line, "end_head") && !got_it) {
00715             fscanf(fp2, "%s", line);
00716             if (!strcmp(line, "sample_byte_format")) {
00717                 fscanf(fp2, "%s", line);
00718                 if (!strcmp(line, "-s2")) {
00719                     fscanf(fp2, "%s", line);
00720                     if (!strcmp(line, "01")) {
00721                         P->input_endian = LITTLE;
00722                         got_it = 1;
00723                     }
00724                     else if (!strcmp(line, "10")) {
00725                         P->input_endian = BIG;
00726                         got_it = 1;
00727                     }
00728                     else
00729                         E_ERROR("Unknown/unsupported byte order\n");
00730                 }
00731                 else
00732                     E_ERROR("Error determining byte format\n");
00733             }
00734         }
00735         if (!got_it) {
00736             E_WARN
00737                 ("Can't find byte format in header, setting to machine's endian\n");
00738             P->input_endian = P->machine_endian;
00739         }
00740         fclose(fp2);
00741     }
00742     else if (P->input_format == RAW) {
00743         /*
00744            P->input_endian = P->machine_endian;
00745          */
00746     }
00747     else if (P->input_format == MSWAV) {
00748         P->input_endian = LITTLE;       // Default for MS WAV riff files
00749     }
00750 
00751     /* FIXME: Why aren't we using stdio here??? */
00752     if ((fp = open(infile, O_RDONLY | O_BINARY, 0644)) < 0) {
00753         fprintf(stderr, "Cannot open %s\n", infile);
00754         return (FE_INPUT_FILE_OPEN_ERROR);
00755     }
00756     else {
00757         if (fstat(fp, &filestats) != 0)
00758             printf("fstat failed\n");
00759 
00760         if (P->input_format == NIST) {
00761             short *hdr_buf;
00762 
00763             len = (filestats.st_size - HEADER_BYTES) / sizeof(short);
00764             /* eat header */
00765             hdr_buf =
00766                 (short *) calloc(HEADER_BYTES / sizeof(short),
00767                                  sizeof(short));
00768             if (read(fp, hdr_buf, HEADER_BYTES) != HEADER_BYTES) {
00769                 E_ERROR("Cannot read %s\n", infile);
00770                 return (FE_INPUT_FILE_READ_ERROR);
00771             }
00772             free(hdr_buf);
00773         }
00774         else if (P->input_format == RAW) {
00775             len = filestats.st_size / sizeof(int16);
00776         }
00777         else if (P->input_format == MSWAV) {
00778             /* Read the header */
00779             MSWAV_hdr *hdr_buf = NULL;
00780             /* MC: read till just before datatag */
00781             const int hdr_len_to_read = ((char *) (&hdr_buf->datatag))
00782                 - (char *) hdr_buf;
00783             int data_start;
00784 
00785             if ((hdr_buf =
00786                  (MSWAV_hdr *) calloc(1, sizeof(MSWAV_hdr))) == NULL) {
00787                 E_ERROR("Cannot allocate for input file header\n");
00788                 return (FE_INPUT_FILE_READ_ERROR);
00789             }
00790             if (read(fp,hdr_buf,hdr_len_to_read) != hdr_len_to_read){
00791                 E_ERROR("Cannot allocate for input file header\n");
00792                 return (FE_INPUT_FILE_READ_ERROR);
00793             }
00794             /* Check header */
00795             if (strncmp(hdr_buf->rifftag, "RIFF", 4) != 0 ||
00796                 strncmp(hdr_buf->wavefmttag, "WAVEfmt", 7) != 0) {
00797                 E_ERROR("Error in mswav file header\n");
00798                 return (FE_INPUT_FILE_READ_ERROR);
00799             }
00800             {
00801                 /* There may be other "chunks" before the data chunk,
00802                  * which we can ignore. We have to find the start of
00803                  * the data chunk, which begins with the string
00804                  * "data".
00805                  */
00806                 int16 found = 0;
00807                 char readChar;
00808                 char *dataString = "data";
00809                 int16 charPointer = 0;
00810                 while (!found) {
00811                     if (read(fp, &readChar, sizeof(char)) != sizeof(char)) {
00812                         E_ERROR("Failed reading wav file.\n");
00813                         return (FE_INPUT_FILE_READ_ERROR);
00814                     }
00815                     if (readChar == dataString[charPointer]) {
00816                         charPointer++;
00817                     }
00818                     if (charPointer == (int) strlen(dataString)) {
00819                         found = 1;
00820                         strcpy(hdr_buf->datatag, dataString);
00821                         if (read
00822                             (fp,
00823                              &(hdr_buf->
00824                                datalength),
00825                              sizeof(int32)) != sizeof(int32)) {
00826                             E_ERROR("Failed reading wav file.\n");
00827                             return (FE_INPUT_FILE_READ_ERROR);
00828                         }
00829                     }
00830                 }
00831             }
00832             data_start = lseek(fp, 0, SEEK_CUR);
00833             if (P->input_endian != P->machine_endian) { // If machine is Big Endian
00834                 hdr_buf->datalength = SWAP_INT32(&(hdr_buf->datalength));
00835                 hdr_buf->data_format = SWAP_INT16(&(hdr_buf->data_format));
00836                 hdr_buf->numchannels = SWAP_INT16(&(hdr_buf->numchannels));
00837                 hdr_buf->BitsPerSample =
00838                     SWAP_INT16(&(hdr_buf->BitsPerSample));
00839                 hdr_buf->SamplingFreq =
00840                     SWAP_INT32(&(hdr_buf->SamplingFreq));
00841                 hdr_buf->BytesPerSec = SWAP_INT32(&(hdr_buf->BytesPerSec));
00842             }
00843             /* Check Format */
00844             if (hdr_buf->data_format != 1 || hdr_buf->BitsPerSample != 16) {
00845                 E_ERROR("MS WAV file not in 16-bit PCM format\n");
00846                 return (FE_INPUT_FILE_READ_ERROR);
00847             }
00848             /* This number may be bogus.  Check for a truncated file. */
00849             len = hdr_buf->datalength / sizeof(short);
00850             if (len > (filestats.st_size - data_start) / sizeof(short))
00851                 len = (filestats.st_size - data_start) / sizeof(short);
00852             
00853             P->nchans = hdr_buf->numchannels;
00854             /* DEBUG: Dump Info */
00855             if (P->verbose) {
00856                 E_INFO("Reading MS Wav file %s:\n", infile);
00857                 E_INFO
00858                     ("\t16 bit PCM data, %d channels %d samples\n",
00859                      P->nchans, len);
00860                 E_INFO("\tSampled at %d\n", hdr_buf->SamplingFreq);
00861             }
00862             free(hdr_buf);
00863         }
00864         else {
00865             E_ERROR("Unknown input file format\n");
00866             return (FE_INPUT_FILE_OPEN_ERROR);
00867         }
00868     }
00869 
00870 
00871     len = len / P->nchans;
00872     *nsamps = len;
00873     *fp_in = fp;
00874 
00875     numblocks = (int) ((float) len / (float) P->blocksize);
00876     if (numblocks * P->blocksize < len)
00877         numblocks++;
00878 
00879     *nblocks = numblocks;
00880 
00881     if ((fp =
00882          open(outfile, O_CREAT | O_WRONLY | O_TRUNC | O_BINARY,
00883               0644)) < 0) {
00884         E_ERROR("Unable to open %s for writing features\n", outfile);
00885         return (FE_OUTPUT_FILE_OPEN_ERROR);
00886     }
00887     else {
00888         size_t nsamps = len;
00889         int frame_shift, frame_size;
00890 
00891         /* Compute number of frames and write cepfile header */
00892         fe_process_frames(FE, NULL, &nsamps, NULL, &numframes);
00893         /* This is sort of hacky... we need to figure out if there
00894            will be a trailing frame from fe_end_utt() or not.  */
00895         fe_get_input_size(FE, &frame_shift, &frame_size);
00896         /* Don't ask me why this has to be <= rather than <, it just does... */
00897         if (frame_size + (numframes - 1) * frame_shift <= len)
00898             ++numframes;
00899 
00900         outlen = numframes * fe_get_output_size(FE);
00901         if (P->output_endian != P->machine_endian)
00902             SWAP_INT32(&outlen);
00903         if (write(fp, &outlen, 4) != 4) {
00904             E_ERROR("Data write error on %s\n", outfile);
00905             close(fp);
00906             return (FE_OUTPUT_FILE_WRITE_ERROR);
00907         }
00908         if (P->output_endian != P->machine_endian)
00909             SWAP_INT32(&outlen);
00910     }
00911 
00912     *nframes = numframes;
00913     *fp_out = fp;
00914 
00915     return 0;
00916 }
00917 
00918 int32
00919 fe_readblock_spch(globals_t * P, int32 fp, int32 nsamps, int16 * buf)
00920 {
00921     int32 bytes_read, cum_bytes_read, nreadbytes, actsamps, offset, i,
00922         j, k;
00923     int16 *tmpbuf;
00924     int32 nchans, whichchan;
00925 
00926     nchans = P->nchans;
00927     whichchan = P->whichchan;
00928 
00929     if (nchans == 1) {
00930         if (P->input_format == RAW
00931             || P->input_format == NIST
00932             || P->input_format == MSWAV) {
00933             nreadbytes = nsamps * sizeof(int16);
00934             if ((bytes_read = read(fp, buf, nreadbytes)) != nreadbytes) {
00935                 E_ERROR_SYSTEM("error reading block: %ld != %d",
00936                                bytes_read, nreadbytes);
00937                 return (0);
00938             }
00939         }
00940         else {
00941             E_ERROR("unknown input file format\n");
00942             return (0);
00943         }
00944         cum_bytes_read = bytes_read;
00945     }
00946     else if (nchans > 1) {
00947 
00948         if (nsamps < P->blocksize) {
00949             actsamps = nsamps * nchans;
00950             tmpbuf = (int16 *) calloc(nsamps * nchans, sizeof(int16));
00951             cum_bytes_read = 0;
00952             if (P->input_format == RAW
00953                 || P->input_format == MSWAV || P->input_format == NIST) {
00954 
00955                 k = 0;
00956                 nreadbytes = actsamps * sizeof(int16);
00957 
00958                 if ((bytes_read =
00959                      read(fp, tmpbuf, nreadbytes)) != nreadbytes) {
00960                     E_ERROR
00961                         ("error reading block (got %d not %d)\n",
00962                          bytes_read, nreadbytes);
00963                     return (0);
00964                 }
00965 
00966                 for (j = whichchan - 1; j < actsamps; j = j + nchans) {
00967                     buf[k] = tmpbuf[j];
00968                     k++;
00969                 }
00970                 cum_bytes_read += bytes_read / nchans;
00971             }
00972             else {
00973                 E_ERROR("unknown input file format\n");
00974                 return (0);
00975             }
00976             free(tmpbuf);
00977         }
00978         else {
00979             tmpbuf = (int16 *) calloc(nsamps, sizeof(int16));
00980             actsamps = nsamps / nchans;
00981             cum_bytes_read = 0;
00982 
00983             if (actsamps * nchans != nsamps) {
00984                 E_WARN
00985                     ("Blocksize %d is not an integer multiple of Number of channels %d\n",
00986                      nsamps, nchans);
00987             }
00988 
00989             if (P->input_format == RAW
00990                 || P->input_format == MSWAV || P->input_format == NIST) {
00991                 for (i = 0; i < nchans; i++) {
00992 
00993                     offset = i * actsamps;
00994                     k = 0;
00995                     nreadbytes = nsamps * sizeof(int16);
00996 
00997                     if ((bytes_read =
00998                          read(fp, tmpbuf, nreadbytes)) != nreadbytes) {
00999                         E_ERROR
01000                             ("error reading block (got %d not %d)\n",
01001                              bytes_read, nreadbytes);
01002                         return (0);
01003                     }
01004 
01005                     for (j = whichchan - 1; j < nsamps; j = j + nchans) {
01006                         buf[offset + k] = tmpbuf[j];
01007                         k++;
01008                     }
01009                     cum_bytes_read += bytes_read / nchans;
01010                 }
01011             }
01012             else {
01013                 E_ERROR("unknown input file format\n");
01014                 return (0);
01015             }
01016             free(tmpbuf);
01017         }
01018     }
01019 
01020     else {
01021         E_ERROR("unknown number of channels!\n");
01022         return (0);
01023     }
01024 
01025     if (P->input_endian != P->machine_endian) {
01026         for (i = 0; i < nsamps; i++)
01027             SWAP_INT16(&buf[i]);
01028     }
01029 
01030     return (cum_bytes_read / sizeof(int16));
01031 
01032 }
01033 
01034 int32
01035 fe_writeblock_feat(globals_t * P, fe_t * FE, int32 fp, int32 nframes,
01036                    mfcc_t ** feat)
01037 {
01038 
01039     int32 i, length, nwritebytes;
01040     float32 **ffeat;
01041 
01042     length = nframes * fe_get_output_size(FE);
01043 
01044     ffeat = (float32 **) feat;
01045     fe_mfcc_to_float(FE, feat, ffeat, nframes);
01046     if (P->output_endian != P->machine_endian) {
01047         for (i = 0; i < length; ++i)
01048             SWAP_FLOAT32(ffeat[0] + i);
01049     }
01050 
01051     nwritebytes = length * sizeof(float32);
01052     if (write(fp, ffeat[0], nwritebytes) != nwritebytes) {
01053         close(fp);
01054         E_FATAL("Error writing block of features\n");
01055     }
01056 
01057     return (length);
01058 }
01059 
01060 
01061 int32
01062 fe_closefiles(int32 fp_in, int32 fp_out)
01063 {
01064     close(fp_in);
01065     close(fp_out);
01066     return 0;
01067 }
01068 
01069 int32
01070 fe_convert_with_dct(globals_t * P, fe_t * FE, char *infile, char *outfile)
01071 {
01072     FILE *ifh, *ofh;
01073     int32 ifsize, nfloats, swap = 0;
01074     int32 input_ncoeffs, output_ncoeffs;
01075     float32 *logspec;
01076 
01077     if ((ifh = fopen(infile, "rb")) == NULL) {
01078         E_ERROR_SYSTEM("Cannot read %s", infile);
01079         return (FE_INPUT_FILE_READ_ERROR);
01080     }
01081     if ((ofh = fopen(outfile, "wb")) == NULL) {
01082         E_ERROR_SYSTEM("Unable to open %s for writing features", outfile);
01083         return (FE_OUTPUT_FILE_OPEN_ERROR);
01084     }
01085 
01086     fseek(ifh, 0, SEEK_END);
01087     ifsize = ftell(ifh);
01088     fseek(ifh, 0, SEEK_SET);
01089     fread(&nfloats, 4, 1, ifh);
01090     if (nfloats != ifsize / 4 - 1) {
01091         E_INFO("Will byteswap %s (%x != %x)\n",
01092                infile, nfloats, ifsize / 4 - 1);
01093         SWAP_INT32(&nfloats);
01094         swap = 1;
01095     }
01096     if (nfloats != ifsize / 4 - 1) {
01097         E_ERROR("Size of file doesn't match header: %d != %d\n",
01098                 nfloats, ifsize / 4 - 1);
01099         return (FE_INPUT_FILE_READ_ERROR);
01100     }
01101     if (P->convert == CEP2SPEC) {
01102         input_ncoeffs = cmd_ln_int32_r(P->config, "-ncep");
01103         output_ncoeffs = cmd_ln_int32_r(P->config, "-nfilt");
01104     }
01105     else {
01106         input_ncoeffs = cmd_ln_int32_r(P->config, "-nfilt");
01107         output_ncoeffs = cmd_ln_int32_r(P->config, "-ncep");
01108     }
01109     nfloats = nfloats * output_ncoeffs / input_ncoeffs;
01110 
01111     if (swap)
01112         SWAP_INT32(&nfloats);
01113     fwrite(&nfloats, 4, 1, ofh);
01114     /* Always use the largest size since it's done inplace */
01115     logspec = ckd_calloc(cmd_ln_int32_r(P->config, "-nfilt"),
01116                          sizeof(*logspec));
01117 
01118     while (fread(logspec, 4, input_ncoeffs, ifh) == input_ncoeffs) {
01119         int32 i;
01120         if (swap) {
01121             for (i = 0; i < input_ncoeffs; ++i) {
01122                 SWAP_FLOAT32(logspec + i);
01123             }
01124         }
01125         fe_float_to_mfcc(FE, &logspec, (mfcc_t **)&logspec, 1);
01126         if (P->convert == CEP2SPEC) {
01127             fe_mfcc_dct3(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01128         }
01129         else {
01130             if (0 == strcmp(cmd_ln_str_r(P->config, "-transform"), "legacy"))
01131                 fe_logspec_to_mfcc(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01132             else
01133                 fe_logspec_dct2(FE, (mfcc_t *)logspec, (mfcc_t *)logspec);
01134         }
01135         fe_mfcc_to_float(FE, (mfcc_t **)&logspec, &logspec, 1);
01136         if (swap) {
01137             for (i = 0; i < output_ncoeffs; ++i) {
01138                 SWAP_FLOAT32(logspec + i);
01139             }
01140         }
01141         if (fwrite(logspec, 4, output_ncoeffs, ofh) < output_ncoeffs) {
01142             E_ERROR_SYSTEM("Failed to write %d coeffs to %s",
01143                            output_ncoeffs, outfile);
01144             ckd_free(logspec);
01145             return (FE_OUTPUT_FILE_WRITE_ERROR);
01146         }
01147     }
01148     if (!feof(ifh)) {
01149         E_ERROR("Short read in input file %s\n", infile);
01150         ckd_free(logspec);
01151         return (FE_INPUT_FILE_READ_ERROR);
01152     }
01153     fclose(ifh);
01154     fclose(ofh);
01155     ckd_free(logspec);
01156 
01157     return FE_SUCCESS;
01158 }
01159 
01160 /*
01161  * Log record.  Maintained by RCS.
01162  *
01163  * $Log: wave2feat.c,v $
01164  * Revision 1.35  2006/02/25 00:53:48  egouvea
01165  * Added the flag "-seed". If dither is being used and the seed is less
01166  * than zero, the random number generator is initialized with time(). If
01167  * it is at least zero, it's initialized with the provided seed. This way
01168  * we have the benefit of having dither, and the benefit of being
01169  * repeatable.
01170  *
01171  * This is consistent with what sphinx3 does. Well, almost. The random
01172  * number generator is still what the compiler provides.
01173  *
01174  * Also, moved fe_init_params to fe_interface.c, so one can initialize a
01175  * variable of type param_t with meaningful values.
01176  *
01177  * Revision 1.34  2006/02/20 23:55:51  egouvea
01178  * Moved fe_dither() to the "library" side rather than the app side, so
01179  * the function can be code when using the front end as a library.
01180  *
01181  * Revision 1.33  2006/02/17 00:31:34  egouvea
01182  * Removed switch -melwarp. Changed the default for window length to
01183  * 0.025625 from 0.256 (so that a window at 16kHz sampling rate has
01184  * exactly 410 samples). Cleaned up include's. Replaced some E_FATAL()
01185  * with E_WARN() and return.
01186  *
01187  * Revision 1.32  2006/02/16 20:11:20  egouvea
01188  * Fixed the code that prints a warning if any zero-energy frames are
01189  * found, and recommending the user to add dither. Previously, it would
01190  * only report the zero energy frames if they happened in the last
01191  * utterance. Now, it reports for each utterance.
01192  *
01193  * Revision 1.31  2006/02/16 00:18:26  egouvea
01194  * Implemented flexible warping function. The user can specify at run
01195  * time which of several shapes they want to use. Currently implemented
01196  * are an affine function (y = ax + b), an inverse linear (y = a/x) and a
01197  * piecewise linear (y = ax, up to a frequency F, and then it "breaks" so
01198  * Nyquist frequency matches in both scales.
01199  *
01200  * Added two switches, -warp_type and -warp_params. The first specifies
01201  * the type, which valid values:
01202  *
01203  * -inverse or inverse_linear
01204  * -linear or affine
01205  * -piecewise or piecewise_linear
01206  *
01207  * The inverse_linear is the same as implemented by EHT. The -mel_warp
01208  * switch was kept for compatibility (maybe remove it in the
01209  * future?). The code is compatible with EHT's changes: cepstra created
01210  * from code after his changes should be the same as now. Scripts that
01211  * worked with his changes should work now without changes. Tested a few
01212  * cases, same results.
01213  *
01214  */

Generated on Fri Jan 14 2011 for SphinxBase by  doxygen 1.7.1