/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ /* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * This work was supported in part by funding from the Defense Advanced * Research Projects Agency and the National Science Foundation of the * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #if defined(_WIN32) && !defined(CYGWIN) #include #endif #include "sphinxbase/pio.h" #include "sphinxbase/filename.h" #include "sphinxbase/err.h" #include "sphinxbase/strfuncs.h" #include "sphinxbase/ckd_alloc.h" #ifndef EXEEXT #define EXEEXT "" #endif enum { COMP_NONE, COMP_COMPRESS, COMP_GZIP, COMP_BZIP2 }; static void guess_comptype(char const *file, int32 *ispipe, int32 *isgz) { size_t k; k = strlen(file); *ispipe = 0; *isgz = COMP_NONE; if ((k > 2) && ((strcmp(file + k - 2, ".Z") == 0) || (strcmp(file + k - 2, ".z") == 0))) { *ispipe = 1; *isgz = COMP_COMPRESS; } else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0) || (strcmp(file + k - 3, ".GZ") == 0))) { *ispipe = 1; *isgz = COMP_GZIP; } else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0) || (strcmp(file + k - 4, ".BZ2") == 0))) { *ispipe = 1; *isgz = COMP_BZIP2; } } FILE * fopen_comp(const char *file, const char *mode, int32 * ispipe) { FILE *fp; #ifndef HAVE_POPEN *ispipe = 0; /* No popen() on WinCE */ #else /* HAVE_POPEN */ int32 isgz; guess_comptype(file, ispipe, &isgz); #endif /* HAVE_POPEN */ if (*ispipe) { #ifndef HAVE_POPEN /* Shouldn't get here, anyway */ E_FATAL("No popen() on WinCE\n"); #else if (strcmp(mode, "r") == 0) { char *command; switch (isgz) { case COMP_GZIP: command = string_join("gunzip" EXEEXT, " -c ", file, NULL); break; case COMP_COMPRESS: command = string_join("zcat" EXEEXT, " ", file, NULL); break; case COMP_BZIP2: command = string_join("bunzip2" EXEEXT, " -c ", file, NULL); break; default: command = NULL; /* Make compiler happy. */ E_FATAL("Unknown compression type %d\n", isgz); } if ((fp = popen(command, mode)) == NULL) { E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode); ckd_free(command); return NULL; } ckd_free(command); } else if (strcmp(mode, "w") == 0) { char *command; switch (isgz) { case COMP_GZIP: command = string_join("gzip" EXEEXT, " > ", file, NULL); break; case COMP_COMPRESS: command = string_join("compress" EXEEXT, " -c > ", file, NULL); break; case COMP_BZIP2: command = string_join("bzip2" EXEEXT, " > ", file, NULL); break; default: command = NULL; /* Make compiler happy. */ E_FATAL("Unknown compression type %d\n", isgz); } if ((fp = popen(command, mode)) == NULL) { E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode); ckd_free(command); return NULL; } ckd_free(command); } else { E_ERROR("Compressed file operation for mode %s is not supported", mode); return NULL; } #endif /* HAVE_POPEN */ } else { fp = fopen(file, mode); } return (fp); } void fclose_comp(FILE * fp, int32 ispipe) { if (ispipe) { #ifdef HAVE_POPEN #if defined(_WIN32) && (!defined(__SYMBIAN32__)) _pclose(fp); #else pclose(fp); #endif #endif } else fclose(fp); } FILE * fopen_compchk(const char *file, int32 * ispipe) { #ifndef HAVE_POPEN *ispipe = 0; /* No popen() on WinCE */ /* And therefore the rest of this function is useless. */ return (fopen_comp(file, "r", ispipe)); #else /* HAVE_POPEN */ int32 isgz; FILE *fh; /* First just try to fopen_comp() it */ if ((fh = fopen_comp(file, "r", ispipe)) != NULL) return fh; else { char *tmpfile; size_t k; /* File doesn't exist; try other compressed/uncompressed form, as appropriate */ guess_comptype(file, ispipe, &isgz); k = strlen(file); tmpfile = ckd_calloc(k+5, 1); strcpy(tmpfile, file); switch (isgz) { case COMP_GZIP: tmpfile[k - 3] = '\0'; break; case COMP_BZIP2: tmpfile[k - 4] = '\0'; break; case COMP_COMPRESS: tmpfile[k - 2] = '\0'; break; case COMP_NONE: strcpy(tmpfile + k, ".gz"); if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { E_WARN("Using %s instead of %s\n", tmpfile, file); ckd_free(tmpfile); return fh; } strcpy(tmpfile + k, ".bz2"); if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { E_WARN("Using %s instead of %s\n", tmpfile, file); ckd_free(tmpfile); return fh; } strcpy(tmpfile + k, ".Z"); if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { E_WARN("Using %s instead of %s\n", tmpfile, file); ckd_free(tmpfile); return fh; } ckd_free(tmpfile); return NULL; } E_WARN("Using %s instead of %s\n", tmpfile, file); fh = fopen_comp(tmpfile, "r", ispipe); ckd_free(tmpfile); return NULL; } #endif /* HAVE_POPEN */ } lineiter_t * lineiter_start(FILE *fh) { lineiter_t *li; li = (lineiter_t *)ckd_calloc(1, sizeof(*li)); li->buf = (char *)ckd_malloc(128); li->buf[0] = '\0'; li->bsiz = 128; li->len = 0; li->fh = fh; li = lineiter_next(li); /* Strip the UTF-8 BOM */ if (li && 0 == strncmp(li->buf, "\xef\xbb\xbf", 3)) { memmove(li->buf, li->buf + 3, strlen(li->buf + 1)); li->len -= 3; } return li; } lineiter_t * lineiter_start_clean(FILE *fh) { lineiter_t *li; li = lineiter_start(fh); if (li == NULL) return li; li->clean = TRUE; if (li->buf && li->buf[0] == '#') { li = lineiter_next(li); } else { string_trim(li->buf, STRING_BOTH); } return li; } static lineiter_t * lineiter_next_plain(lineiter_t *li) { /* We are reading the next line */ li->lineno++; /* Read a line and check for EOF. */ if (fgets(li->buf, li->bsiz, li->fh) == NULL) { lineiter_free(li); return NULL; } /* If we managed to read the whole thing, then we are done * (this will be by far the most common result). */ li->len = (int32)strlen(li->buf); if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n') return li; /* Otherwise we have to reallocate and keep going. */ while (1) { li->bsiz *= 2; li->buf = (char *)ckd_realloc(li->buf, li->bsiz); /* If we get an EOF, we are obviously done. */ if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) { li->len += strlen(li->buf + li->len); return li; } li->len += strlen(li->buf + li->len); /* If we managed to read the whole thing, then we are done. */ if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n') return li; } /* Shouldn't get here. */ return li; } lineiter_t * lineiter_next(lineiter_t *li) { if (!li->clean) return lineiter_next_plain(li); for (li = lineiter_next_plain(li); li; li = lineiter_next_plain(li)) { if (li->buf && li->buf[0] != '#') { li->buf = string_trim(li->buf, STRING_BOTH); break; } } return li; } int lineiter_lineno(lineiter_t *li) { return li->lineno; } void lineiter_free(lineiter_t *li) { if (li == NULL) return; ckd_free(li->buf); ckd_free(li); } char * fread_line(FILE *stream, size_t *out_len) { char *output, *outptr; char buf[128]; output = outptr = NULL; while (fgets(buf, sizeof(buf), stream)) { size_t len = strlen(buf); /* Append this data to the buffer. */ if (output == NULL) { output = (char *)ckd_malloc(len + 1); outptr = output; } else { size_t cur = outptr - output; output = (char *)ckd_realloc(output, cur + len + 1); outptr = output + cur; } memcpy(outptr, buf, len + 1); outptr += len; /* Stop on a short read or end of line. */ if (len < sizeof(buf)-1 || buf[len-1] == '\n') break; } if (out_len) *out_len = outptr - output; return output; } #define FREAD_RETRY_COUNT 60 int32 fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream) { char *data; size_t n_items_read; size_t n_items_rem; uint32 n_retry_rem; int32 loc; n_retry_rem = FREAD_RETRY_COUNT; data = (char *)pointer; loc = 0; n_items_rem = num_items; do { n_items_read = fread(&data[loc], size, n_items_rem, stream); n_items_rem -= n_items_read; if (n_items_rem > 0) { /* an incomplete read occurred */ if (n_retry_rem == 0) return -1; if (n_retry_rem == FREAD_RETRY_COUNT) { E_ERROR_SYSTEM("fread() failed; retrying...\n"); } --n_retry_rem; loc += n_items_read * size; #if !defined(_WIN32) && defined(HAVE_UNISTD_H) sleep(1); #endif } } while (n_items_rem > 0); return num_items; } #ifdef _WIN32_WCE /* No stat() on WinCE */ int32 stat_retry(const char *file, struct stat * statbuf) { WIN32_FIND_DATAW file_data; HANDLE *h; wchar_t *wfile; size_t len; len = mbstowcs(NULL, file, 0) + 1; wfile = ckd_calloc(len, sizeof(*wfile)); mbstowcs(wfile, file, len); if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) { ckd_free(wfile); return -1; } ckd_free(wfile); memset(statbuf, 0, sizeof(*statbuf)); statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime; statbuf->st_size = file_data.nFileSizeLow; FindClose(h); return 0; } int32 stat_mtime(const char *file) { struct stat statbuf; if (stat_retry(file, &statbuf) != 0) return -1; return ((int32) statbuf.st_mtime); } #else #define STAT_RETRY_COUNT 10 int32 stat_retry(const char *file, struct stat * statbuf) { int32 i; for (i = 0; i < STAT_RETRY_COUNT; i++) { #ifndef HAVE_SYS_STAT_H FILE *fp; if ((fp = (FILE *)fopen(file, "r")) != 0) { fseek(fp, 0, SEEK_END); statbuf->st_size = ftell(fp); fclose(fp); return 0; } #else /* HAVE_SYS_STAT_H */ if (stat(file, statbuf) == 0) return 0; #endif if (i == 0) { E_ERROR_SYSTEM("Failed to stat file '%s'; retrying...", file); } #ifdef HAVE_UNISTD_H sleep(1); #endif } return -1; } int32 stat_mtime(const char *file) { struct stat statbuf; #ifdef HAVE_SYS_STAT_H if (stat(file, &statbuf) != 0) return -1; #else /* HAVE_SYS_STAT_H */ if (stat_retry(file, &statbuf) != 0) return -1; #endif /* HAVE_SYS_STAT_H */ return ((int32) statbuf.st_mtime); } #endif /* !_WIN32_WCE */ struct bit_encode_s { FILE *fh; unsigned char buf, bbits; int16 refcount; }; bit_encode_t * bit_encode_attach(FILE *outfh) { bit_encode_t *be; be = (bit_encode_t *)ckd_calloc(1, sizeof(*be)); be->refcount = 1; be->fh = outfh; return be; } bit_encode_t * bit_encode_retain(bit_encode_t *be) { ++be->refcount; return be; } int bit_encode_free(bit_encode_t *be) { if (be == NULL) return 0; if (--be->refcount > 0) return be->refcount; ckd_free(be); return 0; } int bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits) { int tbits; tbits = nbits + be->bbits; if (tbits < 8) { /* Append to buffer. */ be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits)); } else { int i = 0; while (tbits >= 8) { /* Shift bits out of the buffer and splice with high-order bits */ fputc(be->buf | ((bits[i]) >> be->bbits), be->fh); /* Put low-order bits back into buffer */ be->buf = (bits[i] << (8 - be->bbits)) & 0xff; tbits -= 8; ++i; } } /* tbits contains remaining number of bits. */ be->bbits = tbits; return nbits; } int bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits) { unsigned char bits[4]; codeword <<= (32 - nbits); bits[0] = (codeword >> 24) & 0xff; bits[1] = (codeword >> 16) & 0xff; bits[2] = (codeword >> 8) & 0xff; bits[3] = codeword & 0xff; return bit_encode_write(be, bits, nbits); } int bit_encode_flush(bit_encode_t *be) { if (be->bbits) { fputc(be->buf, be->fh); be->bbits = 0; } return 0; } int build_directory(const char *path) { int rv; /* Utterly failed... */ if (strlen(path) == 0) return -1; #if defined(_WIN32) && !defined(CYGWIN) else if ((rv = _mkdir(path)) == 0) return 0; #elif defined(HAVE_SYS_STAT_H) /* Unix, Cygwin, doesn't work on MINGW */ else if ((rv = mkdir(path, 0777)) == 0) return 0; #endif /* Or, it already exists... */ else if (errno == EEXIST) return 0; else if (errno != ENOENT) { E_ERROR_SYSTEM("Failed to create %s", path); return -1; } else { char *dirname = ckd_salloc(path); path2dirname(path, dirname); build_directory(dirname); ckd_free(dirname); #if defined(_WIN32) && !defined(CYGWIN) return _mkdir(path); #elif defined(HAVE_SYS_STAT_H) /* Unix, Cygwin, doesn't work on MINGW */ return mkdir(path, 0777); #endif } }