SphinxBase 0.6

src/libsphinxbase/util/pio.c

00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 #include <config.h>
00039 
00040 #include <stdio.h>
00041 #include <stdlib.h>
00042 #include <string.h>
00043 #ifdef HAVE_UNISTD_H
00044 #include <unistd.h>
00045 #endif
00046 #ifdef HAVE_SYS_STAT_H
00047 #include <sys/stat.h>
00048 #endif
00049 #ifdef HAVE_SYS_TYPES_H
00050 #include <sys/types.h>
00051 #endif
00052 #include <assert.h>
00053 
00054 #include "sphinxbase/pio.h"
00055 #include "sphinxbase/filename.h"
00056 #include "sphinxbase/err.h"
00057 #include "sphinxbase/strfuncs.h"
00058 #include "sphinxbase/ckd_alloc.h"
00059 
00060 #ifndef EXEEXT
00061 #define EXEEXT ""
00062 #endif
00063 
00064 enum {
00065     COMP_NONE,
00066     COMP_COMPRESS,
00067     COMP_GZIP,
00068     COMP_BZIP2
00069 };
00070 
00071 static void
00072 guess_comptype(char const *file, int32 *ispipe, int32 *isgz)
00073 {
00074     int k;
00075 
00076     k = strlen(file);
00077     *ispipe = 0;
00078     *isgz = COMP_NONE;
00079     if ((k > 2)
00080         && ((strcmp(file + k - 2, ".Z") == 0)
00081             || (strcmp(file + k - 2, ".z") == 0))) {
00082         *ispipe = 1;
00083         *isgz = COMP_COMPRESS;
00084     }
00085     else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0)
00086                         || (strcmp(file + k - 3, ".GZ") == 0))) {
00087         *ispipe = 1;
00088         *isgz = COMP_GZIP;
00089     }
00090     else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0)
00091                         || (strcmp(file + k - 4, ".BZ2") == 0))) {
00092         *ispipe = 1;
00093         *isgz = COMP_BZIP2;
00094     }
00095 }
00096 
00097 FILE *
00098 fopen_comp(const char *file, const char *mode, int32 * ispipe)
00099 {
00100     FILE *fp;
00101 
00102 #ifndef HAVE_POPEN
00103     *ispipe = 0; /* No popen() on WinCE */
00104 #else /* HAVE_POPEN */
00105     int32 isgz;
00106     guess_comptype(file, ispipe, &isgz);
00107 #endif /* HAVE_POPEN */
00108 
00109     if (*ispipe) {
00110 #ifndef HAVE_POPEN
00111         /* Shouldn't get here, anyway */
00112         E_FATAL("No popen() on WinCE\n");
00113 #else
00114         if (strcmp(mode, "r") == 0) {
00115             char *command;
00116             switch (isgz) {
00117             case COMP_GZIP:
00118                 command = string_join("gunzip" EXEEXT, " -c ", file, NULL);
00119                 break;
00120             case COMP_COMPRESS:
00121                 command = string_join("zcat" EXEEXT, " ", file, NULL);
00122                 break;
00123             case COMP_BZIP2:
00124                 command = string_join("bunzip2" EXEEXT, " -c ", file, NULL);
00125                 break;
00126             default:
00127                 command = NULL; /* Make compiler happy. */
00128                 E_FATAL("Unknown  compression type %d\n", isgz);
00129             }
00130             if ((fp = popen(command, mode)) == NULL) {
00131                 E_ERROR_SYSTEM("popen (%s,%s) failed\n", command, mode);
00132                 ckd_free(command);
00133                 return NULL;
00134             }
00135             ckd_free(command);
00136         }
00137         else if (strcmp(mode, "w") == 0) {
00138             char *command;
00139             switch (isgz) {
00140             case COMP_GZIP:
00141                 command = string_join("gzip" EXEEXT, " > ", file, NULL);
00142                 break;
00143             case COMP_COMPRESS:
00144                 command = string_join("compress" EXEEXT, " -c > ", file, NULL);
00145                 break;
00146             case COMP_BZIP2:
00147                 command = string_join("bzip2" EXEEXT, " > ", file, NULL);
00148                 break;
00149             default:
00150                 command = NULL; /* Make compiler happy. */
00151                 E_FATAL("Unknown compression type %d\n", isgz);
00152             }
00153             if ((fp = popen(command, mode)) == NULL) {
00154                 E_ERROR_SYSTEM("popen (%s,%s) failed\n", command, mode);
00155                 ckd_free(command);
00156                 return NULL;
00157             }
00158             ckd_free(command);
00159         }
00160         else {
00161             E_ERROR("fopen_comp not implemented for mode = %s\n", mode);
00162             return NULL;
00163         }
00164 #endif /* HAVE_POPEN */
00165     }
00166     else {
00167         fp = fopen(file, mode);
00168     }
00169 
00170     return (fp);
00171 }
00172 
00173 
00174 void
00175 fclose_comp(FILE * fp, int32 ispipe)
00176 {
00177     if (ispipe) {
00178 #ifdef HAVE_POPEN
00179 #if defined(_WIN32) && (!defined(__SYMBIAN32__))
00180         _pclose(fp);
00181 #else
00182         pclose(fp);
00183 #endif
00184 #endif
00185     }
00186     else
00187         fclose(fp);
00188 }
00189 
00190 
00191 FILE *
00192 fopen_compchk(const char *file, int32 * ispipe)
00193 {
00194 #ifndef HAVE_POPEN
00195     *ispipe = 0; /* No popen() on WinCE */
00196     /* And therefore the rest of this function is useless. */
00197     return (fopen_comp(file, "r", ispipe));
00198 #else /* HAVE_POPEN */
00199     int32 isgz;
00200     FILE *fh;
00201 
00202     /* First just try to fopen_comp() it */
00203     if ((fh = fopen_comp(file, "r", ispipe)) != NULL)
00204         return fh;
00205     else {
00206         char *tmpfile;
00207         int k;
00208 
00209         /* File doesn't exist; try other compressed/uncompressed form, as appropriate */
00210         guess_comptype(file, ispipe, &isgz);
00211         k = strlen(file);
00212         tmpfile = ckd_calloc(k+5, 1);
00213         strcpy(tmpfile, file);
00214         switch (isgz) {
00215         case COMP_GZIP:
00216             tmpfile[k - 3] = '\0';
00217             break;
00218         case COMP_BZIP2:
00219             tmpfile[k - 4] = '\0';
00220             break;
00221         case COMP_COMPRESS:
00222             tmpfile[k - 2] = '\0';
00223             break;
00224         case COMP_NONE:
00225             strcpy(tmpfile + k, ".gz");
00226             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
00227                 E_WARN("Using %s instead of %s\n", tmpfile, file);
00228                 ckd_free(tmpfile);
00229                 return fh;
00230             }
00231             strcpy(tmpfile + k, ".bz2");
00232             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
00233                 E_WARN("Using %s instead of %s\n", tmpfile, file);
00234                 ckd_free(tmpfile);
00235                 return fh;
00236             }
00237             strcpy(tmpfile + k, ".Z");
00238             if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) {
00239                 E_WARN("Using %s instead of %s\n", tmpfile, file);
00240                 ckd_free(tmpfile);
00241                 return fh;
00242             }
00243             ckd_free(tmpfile);
00244             return NULL;
00245         }
00246         E_WARN("Using %s instead of %s\n", tmpfile, file);
00247         fh = fopen_comp(tmpfile, "r", ispipe);
00248         ckd_free(tmpfile);
00249         return NULL;
00250     }
00251 #endif /* HAVE_POPEN */
00252 }
00253 
00254 lineiter_t *
00255 lineiter_start(FILE *fh)
00256 {
00257     lineiter_t *li;
00258 
00259     li = ckd_calloc(1, sizeof(*li));
00260     li->buf = ckd_malloc(128);
00261     li->buf[0] = '\0';
00262     li->bsiz = 128;
00263     li->len = 0;
00264     li->fh = fh;
00265 
00266     return lineiter_next(li);
00267 }
00268 
00269 lineiter_t *
00270 lineiter_next(lineiter_t *li)
00271 {
00272     /* Read a line and check for EOF. */
00273     if (fgets(li->buf, li->bsiz, li->fh) == NULL) {
00274         lineiter_free(li);
00275         return NULL;
00276     }
00277     /* If we managed to read the whole thing, then we are done
00278      * (this will be by far the most common result). */
00279     li->len = strlen(li->buf);
00280     if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
00281         return li;
00282 
00283     /* Otherwise we have to reallocate and keep going. */
00284     while (1) {
00285         li->bsiz *= 2;
00286         li->buf = ckd_realloc(li->buf, li->bsiz);
00287         /* If we get an EOF, we are obviously done. */
00288         if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) {
00289             li->len += strlen(li->buf + li->len);
00290             return li;
00291         }
00292         li->len += strlen(li->buf + li->len);
00293         /* If we managed to read the whole thing, then we are done. */
00294         if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n')
00295             return li;
00296     }
00297 
00298     /* Shouldn't get here. */
00299     return li;
00300 }
00301 
00302 void
00303 lineiter_free(lineiter_t *li)
00304 {
00305     if (li == NULL)
00306         return;
00307     ckd_free(li->buf);
00308     ckd_free(li);
00309 }
00310 
00311 char *
00312 fread_line(FILE *stream, size_t *out_len)
00313 {
00314     char *output, *outptr;
00315     char buf[128];
00316 
00317     output = outptr = NULL;
00318     while (fgets(buf, sizeof(buf), stream)) {
00319         size_t len = strlen(buf);
00320         /* Append this data to the buffer. */
00321         if (output == NULL) {
00322             output = ckd_malloc(len + 1);
00323             outptr = output;
00324         }
00325         else {
00326             size_t cur = outptr - output;
00327             output = ckd_realloc(output, cur + len + 1);
00328             outptr = output + cur;
00329         }
00330         memcpy(outptr, buf, len + 1);
00331         outptr += len;
00332         /* Stop on a short read or end of line. */
00333         if (len < sizeof(buf)-1 || buf[len-1] == '\n')
00334             break;
00335     }
00336     if (out_len) *out_len = outptr - output;
00337     return output;
00338 }
00339 
00340 
00341 #define FREAD_RETRY_COUNT       60
00342 
00343 int32
00344 fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream)
00345 {
00346     char *data;
00347     uint32 n_items_read;
00348     uint32 n_items_rem;
00349     uint32 n_retry_rem;
00350     int32 loc;
00351 
00352     n_retry_rem = FREAD_RETRY_COUNT;
00353 
00354     data = pointer;
00355     loc = 0;
00356     n_items_rem = num_items;
00357 
00358     do {
00359         n_items_read = fread(&data[loc], size, n_items_rem, stream);
00360 
00361         n_items_rem -= n_items_read;
00362 
00363         if (n_items_rem > 0) {
00364             /* an incomplete read occurred */
00365 
00366             if (n_retry_rem == 0)
00367                 return -1;
00368 
00369             if (n_retry_rem == FREAD_RETRY_COUNT) {
00370                 E_ERROR_SYSTEM("fread() failed; retrying...\n");
00371             }
00372 
00373             --n_retry_rem;
00374 
00375             loc += n_items_read * size;
00376 #ifdef HAVE_UNISTD_H
00377             sleep(1);
00378 #endif
00379         }
00380     } while (n_items_rem > 0);
00381 
00382     return num_items;
00383 }
00384 
00385 
00386 /* Silvio Moioli: updated to use Unicode */
00387 #ifdef _WIN32_WCE /* No stat() on WinCE */
00388 int32
00389 stat_retry(const char *file, struct stat * statbuf)
00390 {
00391     WIN32_FIND_DATAW file_data;
00392     HANDLE *h;
00393     wchar_t *wfile;
00394     size_t len;
00395 
00396     len = mbstowcs(NULL, file, 0) + 1;
00397     wfile = ckd_calloc(len, sizeof(*wfile));
00398     mbstowcs(wfile, file, len);
00399     if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) {
00400         ckd_free(wfile);
00401         return -1;
00402     }
00403     ckd_free(wfile);
00404     memset(statbuf, 0, sizeof(statbuf));
00405     statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime;
00406     statbuf->st_size = file_data.nFileSizeLow;
00407     FindClose(h);
00408 
00409     return 0;
00410 }
00411 
00412 
00413 int32
00414 stat_mtime(const char *file)
00415 {
00416     struct stat statbuf;
00417 
00418     if (stat_retry(file, &statbuf) != 0)
00419         return -1;
00420 
00421     return ((int32) statbuf.st_mtime);
00422 }
00423 #else
00424 #define STAT_RETRY_COUNT        10
00425 int32
00426 stat_retry(const char *file, struct stat * statbuf)
00427 {
00428     int32 i;
00429 
00430     
00431     
00432     for (i = 0; i < STAT_RETRY_COUNT; i++) {
00433 
00434 #ifndef HAVE_SYS_STAT_H
00435                 FILE *fp;
00436 
00437                 if ((fp=(FILE *)fopen(file, "r"))!= 0)
00438                 {
00439                     fseek( fp, 0, SEEK_END);
00440                     statbuf->st_size = ftell( fp );
00441                     fclose(fp);
00442                     return 0;
00443                 }
00444         
00445 #else /* HAVE_SYS_STAT_H */
00446         if (stat(file, statbuf) == 0)
00447             return 0;
00448 #endif
00449         if (i == 0) {
00450             E_ERROR_SYSTEM("stat(%s) failed; retrying...\n", file);
00451         }
00452 #ifdef HAVE_UNISTD_H
00453         sleep(1);
00454 #endif
00455     }
00456 
00457     return -1;
00458 }
00459 
00460 int32
00461 stat_mtime(const char *file)
00462 {
00463     struct stat statbuf;
00464 
00465 #ifdef HAVE_SYS_STAT_H
00466     if (stat(file, &statbuf) != 0)
00467         return -1;
00468 #else /* HAVE_SYS_STAT_H */
00469     if (stat_retry(file, &statbuf) != 0)
00470         return -1;
00471 #endif /* HAVE_SYS_STAT_H */
00472 
00473     return ((int32) statbuf.st_mtime);
00474 }
00475 #endif /* !_WIN32_WCE */
00476 
00477 struct bit_encode_s {
00478     FILE *fh;
00479     unsigned char buf, bbits;
00480     int16 refcount;
00481 };
00482 
00483 bit_encode_t *
00484 bit_encode_attach(FILE *outfh)
00485 {
00486     bit_encode_t *be;
00487 
00488     be = ckd_calloc(1, sizeof(*be));
00489     be->refcount = 1;
00490     be->fh = outfh;
00491     return be;
00492 }
00493 
00494 bit_encode_t *
00495 bit_encode_retain(bit_encode_t *be)
00496 {
00497     ++be->refcount;
00498     return be;
00499 }
00500 
00501 int
00502 bit_encode_free(bit_encode_t *be)
00503 {
00504     if (be == NULL)
00505         return 0;
00506     if (--be->refcount > 0)
00507         return be->refcount;
00508     ckd_free(be);
00509 
00510     return 0;
00511 }
00512 
00513 int
00514 bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits)
00515 {
00516     int tbits;
00517 
00518     tbits = nbits + be->bbits;
00519     if (tbits < 8)  {
00520         /* Append to buffer. */
00521         be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits));
00522     }
00523     else {
00524         int i = 0;
00525         while (tbits >= 8) {
00526             /* Shift bits out of the buffer and splice with high-order bits */
00527             fputc(be->buf | ((bits[i]) >> be->bbits), be->fh);
00528             /* Put low-order bits back into buffer */
00529             be->buf = (bits[i] << (8 - be->bbits)) & 0xff;
00530             tbits -= 8;
00531             ++i;
00532         }
00533     }
00534     /* tbits contains remaining number of  bits. */
00535     be->bbits = tbits;
00536 
00537     return nbits;
00538 }
00539 
00540 int
00541 bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits)
00542 {
00543     unsigned char bits[4];
00544     codeword <<= (32 - nbits);
00545     bits[0] = (codeword >> 24) & 0xff;
00546     bits[1] = (codeword >> 16) & 0xff;
00547     bits[2] = (codeword >> 8) & 0xff;
00548     bits[3] = codeword & 0xff;
00549     return bit_encode_write(be, bits, nbits);
00550 }
00551 
00552 int
00553 bit_encode_flush(bit_encode_t *be)
00554 {
00555     if (be->bbits) {
00556         fputc(be->buf, be->fh);
00557         be->bbits = 0;
00558     }
00559     return 0;
00560 }
00561 
00562 #ifdef HAVE_SYS_STAT_H /* Unix, Cygwin */
00563 int
00564 build_directory(const char *path)
00565 {
00566     int rv;
00567 
00568     /* Utterly failed... */
00569     if (strlen(path) == 0)
00570         return -1;
00571     /* Utterly succeeded... */
00572     else if ((rv = mkdir(path, 0777)) == 0)
00573         return 0;
00574     /* Or, it already exists... */
00575     else if (errno == EEXIST)
00576         return 0;
00577     else if (errno != ENOENT) {
00578         E_ERROR_SYSTEM("Failed to create %s");
00579         return -1;
00580     }
00581     else {
00582         char *dirname = ckd_salloc(path);
00583         path2dirname(path, dirname);
00584         build_directory(dirname);
00585         ckd_free(dirname);
00586         return mkdir(path, 0777);
00587     }
00588 }
00589 #elif defined(_WIN32)
00590 /* FIXME: Implement this. */
00591 int
00592 build_directory(const char *path)
00593 {
00594     E_ERROR("build_directory() unimplemented on your platform!\n");
00595     return -1;
00596 }
00597 #else
00598 int
00599 build_directory(const char *path)
00600 {
00601     E_ERROR("build_directory() unimplemented on your platform!\n");
00602     return -1;
00603 }
00604 #endif