diff options
Diffstat (limited to 'media/pocketsphinx/src/acmod.c')
-rw-r--r-- | media/pocketsphinx/src/acmod.c | 1377 |
1 files changed, 0 insertions, 1377 deletions
diff --git a/media/pocketsphinx/src/acmod.c b/media/pocketsphinx/src/acmod.c deleted file mode 100644 index ba039f70ed..0000000000 --- a/media/pocketsphinx/src/acmod.c +++ /dev/null @@ -1,1377 +0,0 @@ -/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* ==================================================================== - * Copyright (c) 2008 Carnegie Mellon University. All rights - * reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * This work was supported in part by funding from the Defense Advanced - * Research Projects Agency and the National Science Foundation of the - * United States of America, and the CMU Sphinx Speech Consortium. - * - * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND - * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY - * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ==================================================================== - * - */ - - -/** - * @file acmod.c Acoustic model structures for PocketSphinx. - * @author David Huggins-Daines <dhuggins@cs.cmu.edu> - */ - -/* System headers. */ -#include <assert.h> -#include <string.h> -#include <math.h> - -/* SphinxBase headers. */ -#include <sphinxbase/prim_type.h> -#include <sphinxbase/err.h> -#include <sphinxbase/cmd_ln.h> -#include <sphinxbase/strfuncs.h> -#include <sphinxbase/byteorder.h> -#include <sphinxbase/feat.h> -#include <sphinxbase/bio.h> - -/* Local headers. */ -#include "cmdln_macro.h" -#include "acmod.h" -#include "s2_semi_mgau.h" -#include "ptm_mgau.h" -#include "ms_mgau.h" - -/* Feature and front-end parameters that may be in feat.params */ -static const arg_t feat_defn[] = { - waveform_to_cepstral_command_line_macro(), - cepstral_to_feature_command_line_macro(), - CMDLN_EMPTY_OPTION -}; - -#ifndef WORDS_BIGENDIAN -#define WORDS_BIGENDIAN 1 -#endif - -static int32 acmod_process_mfcbuf(acmod_t *acmod); - -static int -acmod_init_am(acmod_t *acmod) -{ - char const *mdeffn, *tmatfn, *mllrfn, *hmmdir; - - /* Read model definition. */ - if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) { - if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL) - E_ERROR("Acoustic model definition is not specified either " - "with -mdef option or with -hmm\n"); - else - E_ERROR("Folder '%s' does not contain acoustic model " - "definition 'mdef'\n", hmmdir); - - return -1; - } - - if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) { - E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn); - return -1; - } - - /* Read transition matrices. */ - if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) { - E_ERROR("No tmat file specified\n"); - return -1; - } - acmod->tmat = tmat_init(tmatfn, acmod->lmath, - cmd_ln_float32_r(acmod->config, "-tmatfloor"), - TRUE); - - /* Read the acoustic models. */ - if ((cmd_ln_str_r(acmod->config, "-mean") == NULL) - || (cmd_ln_str_r(acmod->config, "-var") == NULL) - || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) { - E_ERROR("No mean/var/tmat files specified\n"); - return -1; - } - - if (cmd_ln_str_r(acmod->config, "-senmgau")) { - E_INFO("Using general multi-stream GMM computation\n"); - acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef); - if (acmod->mgau == NULL) - return -1; - } - else { - E_INFO("Attempting to use PTM computation module\n"); - if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) { - E_INFO("Attempting to use semi-continuous computation module\n"); - if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) { - E_INFO("Falling back to general multi-stream GMM computation\n"); - acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef); - if (acmod->mgau == NULL) - return -1; - } - } - } - - /* If there is an MLLR transform, apply it. */ - if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) { - ps_mllr_t *mllr = ps_mllr_read(mllrfn); - if (mllr == NULL) - return -1; - acmod_update_mllr(acmod, mllr); - } - - return 0; -} - -static int -acmod_init_feat(acmod_t *acmod) -{ - acmod->fcb = - feat_init(cmd_ln_str_r(acmod->config, "-feat"), - cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")), - cmd_ln_boolean_r(acmod->config, "-varnorm"), - agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")), - 1, cmd_ln_int32_r(acmod->config, "-ceplen")); - if (acmod->fcb == NULL) - return -1; - - if (cmd_ln_str_r(acmod->config, "-lda")) { - E_INFO("Reading linear feature transformation from %s\n", - cmd_ln_str_r(acmod->config, "-lda")); - if (feat_read_lda(acmod->fcb, - cmd_ln_str_r(acmod->config, "-lda"), - cmd_ln_int32_r(acmod->config, "-ldadim")) < 0) - return -1; - } - - if (cmd_ln_str_r(acmod->config, "-svspec")) { - int32 **subvecs; - E_INFO("Using subvector specification %s\n", - cmd_ln_str_r(acmod->config, "-svspec")); - if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL) - return -1; - if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0) - return -1; - } - - if (cmd_ln_exists_r(acmod->config, "-agcthresh") - && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) { - agc_set_threshold(acmod->fcb->agc_struct, - cmd_ln_float32_r(acmod->config, "-agcthresh")); - } - - if (acmod->fcb->cmn_struct - && cmd_ln_exists_r(acmod->config, "-cmninit")) { - char *c, *cc, *vallist; - int32 nvals; - - vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit")); - c = vallist; - nvals = 0; - while (nvals < acmod->fcb->cmn_struct->veclen - && (cc = strchr(c, ',')) != NULL) { - *cc = '\0'; - acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); - c = cc + 1; - ++nvals; - } - if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') { - acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); - } - ckd_free(vallist); - } - return 0; -} - -int -acmod_fe_mismatch(acmod_t *acmod, fe_t *fe) -{ - /* Output vector dimension needs to be the same. */ - if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe)) { - E_ERROR("Configured feature length %d doesn't match feature " - "extraction output size %d\n", - cmd_ln_int32_r(acmod->config, "-ceplen"), - fe_get_output_size(fe)); - return TRUE; - } - /* Feature parameters need to be the same. */ - /* ... */ - return FALSE; -} - -int -acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb) -{ - /* Feature type needs to be the same. */ - if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb))) - return TRUE; - /* Input vector dimension needs to be the same. */ - if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb)) - return TRUE; - /* FIXME: Need to check LDA and stuff too. */ - return FALSE; -} - -acmod_t * -acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb) -{ - acmod_t *acmod; - char const *featparams; - - acmod = ckd_calloc(1, sizeof(*acmod)); - acmod->config = cmd_ln_retain(config); - acmod->lmath = lmath; - acmod->state = ACMOD_IDLE; - - /* Look for feat.params in acoustic model dir. */ - if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) { - if (NULL != - cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE)) - E_INFO("Parsed model-specific feature parameters from %s\n", - featparams); - } - - /* Initialize feature computation. */ - if (fe) { - if (acmod_fe_mismatch(acmod, fe)) - goto error_out; - fe_retain(fe); - acmod->fe = fe; - } - else { - /* Initialize a new front end. */ - acmod->fe = fe_init_auto_r(config); - if (acmod->fe == NULL) - goto error_out; - if (acmod_fe_mismatch(acmod, acmod->fe)) - goto error_out; - } - if (fcb) { - if (acmod_feat_mismatch(acmod, fcb)) - goto error_out; - feat_retain(fcb); - acmod->fcb = fcb; - } - else { - /* Initialize a new fcb. */ - if (acmod_init_feat(acmod) < 0) - goto error_out; - } - - /* Load acoustic model parameters. */ - if (acmod_init_am(acmod) < 0) - goto error_out; - - - /* The MFCC buffer needs to be at least as large as the dynamic - * feature window. */ - acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1; - acmod->mfc_buf = (mfcc_t **) - ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize, - sizeof(**acmod->mfc_buf)); - - /* Feature buffer has to be at least as large as MFCC buffer. */ - acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window"); - acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc); - acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos)); - - acmod->utt_start_frame = 0; - - /* Senone computation stuff. */ - acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), - sizeof(*acmod->senone_scores)); - acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); - acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), - sizeof(*acmod->senone_active)); - acmod->log_zero = logmath_get_zero(acmod->lmath); - acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen"); - return acmod; - -error_out: - acmod_free(acmod); - return NULL; -} - -void -acmod_free(acmod_t *acmod) -{ - if (acmod == NULL) - return; - - feat_free(acmod->fcb); - fe_free(acmod->fe); - cmd_ln_free_r(acmod->config); - - if (acmod->mfc_buf) - ckd_free_2d((void **)acmod->mfc_buf); - if (acmod->feat_buf) - feat_array_free(acmod->feat_buf); - - if (acmod->mfcfh) - fclose(acmod->mfcfh); - if (acmod->rawfh) - fclose(acmod->rawfh); - if (acmod->senfh) - fclose(acmod->senfh); - - ckd_free(acmod->framepos); - ckd_free(acmod->senone_scores); - ckd_free(acmod->senone_active_vec); - ckd_free(acmod->senone_active); - ckd_free(acmod->rawdata); - - if (acmod->mdef) - bin_mdef_free(acmod->mdef); - if (acmod->tmat) - tmat_free(acmod->tmat); - if (acmod->mgau) - ps_mgau_free(acmod->mgau); - if (acmod->mllr) - ps_mllr_free(acmod->mllr); - - ckd_free(acmod); -} - -ps_mllr_t * -acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr) -{ - if (acmod->mllr) - ps_mllr_free(acmod->mllr); - acmod->mllr = mllr; - ps_mgau_transform(acmod->mgau, mllr); - - return mllr; -} - -int -acmod_write_senfh_header(acmod_t *acmod, FILE *logfh) -{ - char nsenstr[64], logbasestr[64]; - - sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef)); - sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath)); - return bio_writehdr(logfh, - "version", "0.1", - "mdef_file", cmd_ln_str_r(acmod->config, "-mdef"), - "n_sen", nsenstr, - "logbase", logbasestr, NULL); -} - -int -acmod_set_senfh(acmod_t *acmod, FILE *logfh) -{ - if (acmod->senfh) - fclose(acmod->senfh); - acmod->senfh = logfh; - if (logfh == NULL) - return 0; - return acmod_write_senfh_header(acmod, logfh); -} - -int -acmod_set_mfcfh(acmod_t *acmod, FILE *logfh) -{ - int rv = 0; - - if (acmod->mfcfh) - fclose(acmod->mfcfh); - acmod->mfcfh = logfh; - fwrite(&rv, 4, 1, acmod->mfcfh); - return rv; -} - -int -acmod_set_rawfh(acmod_t *acmod, FILE *logfh) -{ - if (acmod->rawfh) - fclose(acmod->rawfh); - acmod->rawfh = logfh; - return 0; -} - -void -acmod_grow_feat_buf(acmod_t *acmod, int nfr) -{ - if (nfr > MAX_N_FRAMES) - E_FATAL("Decoder can not process more than %d frames at once, " - "requested %d\n", MAX_N_FRAMES, nfr); - - acmod->feat_buf = feat_array_realloc(acmod->fcb, acmod->feat_buf, - acmod->n_feat_alloc, nfr); - acmod->framepos = ckd_realloc(acmod->framepos, - nfr * sizeof(*acmod->framepos)); - acmod->n_feat_alloc = nfr; -} - -int -acmod_set_grow(acmod_t *acmod, int grow_feat) -{ - int tmp = acmod->grow_feat; - acmod->grow_feat = grow_feat; - - /* Expand feat_buf to a reasonable size to start with. */ - if (grow_feat && acmod->n_feat_alloc < 128) - acmod_grow_feat_buf(acmod, 128); - - return tmp; -} - -int -acmod_start_utt(acmod_t *acmod) -{ - fe_start_utt(acmod->fe); - acmod->state = ACMOD_STARTED; - acmod->n_mfc_frame = 0; - acmod->n_feat_frame = 0; - acmod->mfc_outidx = 0; - acmod->feat_outidx = 0; - acmod->output_frame = 0; - acmod->senscr_frame = -1; - acmod->n_senone_active = 0; - acmod->mgau->frame_idx = 0; - acmod->rawdata_pos = 0; - - return 0; -} - -int -acmod_end_utt(acmod_t *acmod) -{ - int32 nfr = 0; - - acmod->state = ACMOD_ENDED; - if (acmod->n_mfc_frame < acmod->n_mfc_alloc) { - int inptr; - /* Where to start writing them (circular buffer) */ - inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc; - /* nfr is always either zero or one. */ - fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr); - acmod->n_mfc_frame += nfr; - - /* Process whatever's left, and any leadout or update stats if needed. */ - if (nfr) - nfr = acmod_process_mfcbuf(acmod); - else - feat_update_stats(acmod->fcb); - } - if (acmod->mfcfh) { - long outlen; - int32 rv; - outlen = (ftell(acmod->mfcfh) - 4) / 4; - if (!WORDS_BIGENDIAN) - SWAP_INT32(&outlen); - /* Try to seek and write */ - if ((rv = fseek(acmod->mfcfh, 0, SEEK_SET)) == 0) { - fwrite(&outlen, 4, 1, acmod->mfcfh); - } - fclose(acmod->mfcfh); - acmod->mfcfh = NULL; - } - if (acmod->rawfh) { - fclose(acmod->rawfh); - acmod->rawfh = NULL; - } - - if (acmod->senfh) { - fclose(acmod->senfh); - acmod->senfh = NULL; - } - - return nfr; -} - -static int -acmod_log_mfc(acmod_t *acmod, - mfcc_t **cep, int n_frames) -{ - int i, n; - int32 *ptr = (int32 *)cep[0]; - - n = n_frames * feat_cepsize(acmod->fcb); - /* Swap bytes. */ - if (!WORDS_BIGENDIAN) { - for (i = 0; i < (n * sizeof(mfcc_t)); ++i) { - SWAP_INT32(ptr + i); - } - } - /* Write features. */ - if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) { - E_ERROR_SYSTEM("Failed to write %d values to log file", n); - } - - /* Swap them back. */ - if (!WORDS_BIGENDIAN) { - for (i = 0; i < (n * sizeof(mfcc_t)); ++i) { - SWAP_INT32(ptr + i); - } - } - return 0; -} - -static int -acmod_process_full_cep(acmod_t *acmod, - mfcc_t ***inout_cep, - int *inout_n_frames) -{ - int32 nfr; - - /* Write to log file. */ - if (acmod->mfcfh) - acmod_log_mfc(acmod, *inout_cep, *inout_n_frames); - - /* Resize feat_buf to fit. */ - if (acmod->n_feat_alloc < *inout_n_frames) { - - if (*inout_n_frames > MAX_N_FRAMES) - E_FATAL("Batch processing can not process more than %d frames " - "at once, requested %d\n", MAX_N_FRAMES, *inout_n_frames); - - feat_array_free(acmod->feat_buf); - acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames); - acmod->n_feat_alloc = *inout_n_frames; - acmod->n_feat_frame = 0; - acmod->feat_outidx = 0; - } - /* Make dynamic features. */ - nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames, - TRUE, TRUE, acmod->feat_buf); - acmod->n_feat_frame = nfr; - assert(acmod->n_feat_frame <= acmod->n_feat_alloc); - *inout_cep += *inout_n_frames; - *inout_n_frames = 0; - - return nfr; -} - -static int -acmod_process_full_raw(acmod_t *acmod, - int16 const **inout_raw, - size_t *inout_n_samps) -{ - int32 nfr, ntail; - mfcc_t **cepptr; - - /* Write to logging file if any. */ - if (*inout_n_samps + acmod->rawdata_pos < acmod->rawdata_size) { - memcpy(acmod->rawdata + acmod->rawdata_pos, *inout_raw, *inout_n_samps * sizeof(int16)); - acmod->rawdata_pos += *inout_n_samps; - } - if (acmod->rawfh) - fwrite(*inout_raw, sizeof(int16), *inout_n_samps, acmod->rawfh); - /* Resize mfc_buf to fit. */ - if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr, NULL) < 0) - return -1; - if (acmod->n_mfc_alloc < nfr + 1) { - ckd_free_2d(acmod->mfc_buf); - acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe), - sizeof(**acmod->mfc_buf)); - acmod->n_mfc_alloc = nfr + 1; - } - acmod->n_mfc_frame = 0; - acmod->mfc_outidx = 0; - fe_start_utt(acmod->fe); - if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps, - acmod->mfc_buf, &nfr, NULL) < 0) - return -1; - fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail); - nfr += ntail; - - cepptr = acmod->mfc_buf; - nfr = acmod_process_full_cep(acmod, &cepptr, &nfr); - acmod->n_mfc_frame = 0; - return nfr; -} - -/** - * Process MFCCs that are in the internal buffer into features. - */ -static int32 -acmod_process_mfcbuf(acmod_t *acmod) -{ - mfcc_t **mfcptr; - int32 ncep; - - ncep = acmod->n_mfc_frame; - /* Also do this in two parts because of the circular mfc_buf. */ - if (acmod->mfc_outidx + ncep > acmod->n_mfc_alloc) { - int32 ncep1 = acmod->n_mfc_alloc - acmod->mfc_outidx; - int saved_state = acmod->state; - - /* Make sure we don't end the utterance here. */ - if (acmod->state == ACMOD_ENDED) - acmod->state = ACMOD_PROCESSING; - mfcptr = acmod->mfc_buf + acmod->mfc_outidx; - ncep1 = acmod_process_cep(acmod, &mfcptr, &ncep1, FALSE); - /* It's possible that not all available frames were filled. */ - ncep -= ncep1; - acmod->n_mfc_frame -= ncep1; - acmod->mfc_outidx += ncep1; - acmod->mfc_outidx %= acmod->n_mfc_alloc; - /* Restore original state (could this really be the end) */ - acmod->state = saved_state; - } - mfcptr = acmod->mfc_buf + acmod->mfc_outidx; - ncep = acmod_process_cep(acmod, &mfcptr, &ncep, FALSE); - acmod->n_mfc_frame -= ncep; - acmod->mfc_outidx += ncep; - acmod->mfc_outidx %= acmod->n_mfc_alloc; - return ncep; -} - -int -acmod_process_raw(acmod_t *acmod, - int16 const **inout_raw, - size_t *inout_n_samps, - int full_utt) -{ - int32 ncep; - int32 out_frameidx; - int16 const *prev_audio_inptr; - - /* If this is a full utterance, process it all at once. */ - if (full_utt) - return acmod_process_full_raw(acmod, inout_raw, inout_n_samps); - - /* Append MFCCs to the end of any that are previously in there - * (in practice, there will probably be none) */ - if (inout_n_samps && *inout_n_samps) { - int inptr; - int32 processed_samples; - - prev_audio_inptr = *inout_raw; - /* Total number of frames available. */ - ncep = acmod->n_mfc_alloc - acmod->n_mfc_frame; - /* Where to start writing them (circular buffer) */ - inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc; - - /* Write them in two (or more) parts if there is wraparound. */ - while (inptr + ncep > acmod->n_mfc_alloc) { - int32 ncep1 = acmod->n_mfc_alloc - inptr; - if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps, - acmod->mfc_buf + inptr, &ncep1, &out_frameidx) < 0) - return -1; - - if (out_frameidx > 0) - acmod->utt_start_frame = out_frameidx; - - processed_samples = *inout_raw - prev_audio_inptr; - if (processed_samples + acmod->rawdata_pos < acmod->rawdata_size) { - memcpy(acmod->rawdata + acmod->rawdata_pos, prev_audio_inptr, processed_samples * sizeof(int16)); - acmod->rawdata_pos += processed_samples; - } - /* Write to logging file if any. */ - if (acmod->rawfh) { - fwrite(prev_audio_inptr, sizeof(int16), - processed_samples, - acmod->rawfh); - } - prev_audio_inptr = *inout_raw; - - /* ncep1 now contains the number of frames actually - * processed. This is a good thing, but it means we - * actually still might have some room left at the end of - * the buffer, hence the while loop. Unfortunately it - * also means that in the case where we are really - * actually done, we need to get out totally, hence the - * goto. */ - acmod->n_mfc_frame += ncep1; - ncep -= ncep1; - inptr += ncep1; - inptr %= acmod->n_mfc_alloc; - if (ncep1 == 0) - goto alldone; - } - - assert(inptr + ncep <= acmod->n_mfc_alloc); - if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps, - acmod->mfc_buf + inptr, &ncep, &out_frameidx) < 0) - return -1; - - if (out_frameidx > 0) - acmod->utt_start_frame = out_frameidx; - - - processed_samples = *inout_raw - prev_audio_inptr; - if (processed_samples + acmod->rawdata_pos < acmod->rawdata_size) { - memcpy(acmod->rawdata + acmod->rawdata_pos, prev_audio_inptr, processed_samples * sizeof(int16)); - acmod->rawdata_pos += processed_samples; - } - if (acmod->rawfh) { - fwrite(prev_audio_inptr, sizeof(int16), - processed_samples, acmod->rawfh); - } - prev_audio_inptr = *inout_raw; - acmod->n_mfc_frame += ncep; - alldone: - ; - } - - /* Hand things off to acmod_process_cep. */ - return acmod_process_mfcbuf(acmod); -} - -int -acmod_process_cep(acmod_t *acmod, - mfcc_t ***inout_cep, - int *inout_n_frames, - int full_utt) -{ - int32 nfeat, ncep, inptr; - int orig_n_frames; - - /* If this is a full utterance, process it all at once. */ - if (full_utt) - return acmod_process_full_cep(acmod, inout_cep, inout_n_frames); - - /* Write to log file. */ - if (acmod->mfcfh) - acmod_log_mfc(acmod, *inout_cep, *inout_n_frames); - - /* Maximum number of frames we're going to generate. */ - orig_n_frames = ncep = nfeat = *inout_n_frames; - - /* FIXME: This behaviour isn't guaranteed... */ - if (acmod->state == ACMOD_ENDED) - nfeat += feat_window_size(acmod->fcb); - else if (acmod->state == ACMOD_STARTED) - nfeat -= feat_window_size(acmod->fcb); - - /* Clamp number of features to fit available space. */ - if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) { - /* Grow it as needed - we have to grow it at the end of an - * utterance because we can't return a short read there. */ - if (acmod->grow_feat || acmod->state == ACMOD_ENDED) - acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat); - else - ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame)); - } - - /* Where to start writing in the feature buffer. */ - if (acmod->grow_feat) { - /* Grow to avoid wraparound if grow_feat == TRUE. */ - inptr = acmod->feat_outidx + acmod->n_feat_frame; - while (inptr + nfeat >= acmod->n_feat_alloc) - acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); - } - else { - inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc; - } - - - /* FIXME: we can't split the last frame drop properly to be on the bounary, - * so just return - */ - if (inptr + nfeat > acmod->n_feat_alloc && acmod->state == ACMOD_ENDED) { - *inout_n_frames -= ncep; - *inout_cep += ncep; - return 0; - } - - /* Write them in two parts if there is wraparound. */ - if (inptr + nfeat > acmod->n_feat_alloc) { - int32 ncep1 = acmod->n_feat_alloc - inptr; - - /* Make sure we don't end the utterance here. */ - nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, - &ncep1, - (acmod->state == ACMOD_STARTED), - FALSE, - acmod->feat_buf + inptr); - if (nfeat < 0) - return -1; - /* Move the output feature pointer forward. */ - acmod->n_feat_frame += nfeat; - assert(acmod->n_feat_frame <= acmod->n_feat_alloc); - inptr += nfeat; - inptr %= acmod->n_feat_alloc; - /* Move the input feature pointers forward. */ - *inout_n_frames -= ncep1; - *inout_cep += ncep1; - ncep -= ncep1; - } - - nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, - &ncep, - (acmod->state == ACMOD_STARTED), - (acmod->state == ACMOD_ENDED), - acmod->feat_buf + inptr); - if (nfeat < 0) - return -1; - acmod->n_feat_frame += nfeat; - assert(acmod->n_feat_frame <= acmod->n_feat_alloc); - /* Move the input feature pointers forward. */ - *inout_n_frames -= ncep; - *inout_cep += ncep; - if (acmod->state == ACMOD_STARTED) - acmod->state = ACMOD_PROCESSING; - - return orig_n_frames - *inout_n_frames; -} - -int -acmod_process_feat(acmod_t *acmod, - mfcc_t **feat) -{ - int i, inptr; - - if (acmod->n_feat_frame == acmod->n_feat_alloc) { - if (acmod->grow_feat) - acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); - else - return 0; - } - - if (acmod->grow_feat) { - /* Grow to avoid wraparound if grow_feat == TRUE. */ - inptr = acmod->feat_outidx + acmod->n_feat_frame; - while (inptr + 1 >= acmod->n_feat_alloc) - acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); - } - else { - inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc; - } - for (i = 0; i < feat_dimension1(acmod->fcb); ++i) - memcpy(acmod->feat_buf[inptr][i], - feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat)); - ++acmod->n_feat_frame; - assert(acmod->n_feat_frame <= acmod->n_feat_alloc); - - return 1; -} - -static int -acmod_read_senfh_header(acmod_t *acmod) -{ - char **name, **val; - int32 swap; - int i; - - if (bio_readhdr(acmod->insenfh, &name, &val, &swap) < 0) - goto error_out; - for (i = 0; name[i] != NULL; ++i) { - if (!strcmp(name[i], "n_sen")) { - if (atoi(val[i]) != bin_mdef_n_sen(acmod->mdef)) { - E_ERROR("Number of senones in senone file (%d) does not " - "match mdef (%d)\n", atoi(val[i]), - bin_mdef_n_sen(acmod->mdef)); - goto error_out; - } - } - - if (!strcmp(name[i], "logbase")) { - if (fabs(atof_c(val[i]) - logmath_get_base(acmod->lmath)) > 0.001) { - E_ERROR("Logbase in senone file (%f) does not match acmod " - "(%f)\n", atof_c(val[i]), - logmath_get_base(acmod->lmath)); - goto error_out; - } - } - } - acmod->insen_swap = swap; - bio_hdrarg_free(name, val); - return 0; -error_out: - bio_hdrarg_free(name, val); - return -1; -} - -int -acmod_set_insenfh(acmod_t *acmod, FILE *senfh) -{ - acmod->insenfh = senfh; - if (senfh == NULL) { - acmod->n_feat_frame = 0; - acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen"); - return 0; - } - acmod->compallsen = TRUE; - return acmod_read_senfh_header(acmod); -} - -int -acmod_rewind(acmod_t *acmod) -{ - /* If the feature buffer is circular, this is not possible. */ - if (acmod->output_frame > acmod->n_feat_alloc) { - E_ERROR("Circular feature buffer cannot be rewound (output frame %d, " - "alloc %d)\n", acmod->output_frame, acmod->n_feat_alloc); - return -1; - } - - /* Frames consumed + frames available */ - acmod->n_feat_frame = acmod->output_frame + acmod->n_feat_frame; - - /* Reset output pointers. */ - acmod->feat_outidx = 0; - acmod->output_frame = 0; - acmod->senscr_frame = -1; - acmod->mgau->frame_idx = 0; - - return 0; -} - -int -acmod_advance(acmod_t *acmod) -{ - /* Advance the output pointers. */ - if (++acmod->feat_outidx == acmod->n_feat_alloc) - acmod->feat_outidx = 0; - --acmod->n_feat_frame; - ++acmod->mgau->frame_idx; - - return ++acmod->output_frame; -} - -int -acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, - int16 const *senscr, FILE *senfh) -{ - int16 n_active2; - - /* Uncompressed frame format: - * - * (2 bytes) n_active: Number of active senones - * If all senones active: - * (n_active * 2 bytes) scores of active senones - * - * Otherwise: - * (2 bytes) n_active: Number of active senones - * (n_active bytes) deltas to active senones - * (n_active * 2 bytes) scores of active senones - */ - n_active2 = n_active; - if (fwrite(&n_active2, 2, 1, senfh) != 1) - goto error_out; - if (n_active == bin_mdef_n_sen(acmod->mdef)) { - if (fwrite(senscr, 2, n_active, senfh) != n_active) - goto error_out; - } - else { - int i, n; - if (fwrite(active, 1, n_active, senfh) != n_active) - goto error_out; - for (i = n = 0; i < n_active; ++i) { - n += active[i]; - if (fwrite(senscr + n, 2, 1, senfh) != 1) - goto error_out; - } - } - return 0; -error_out: - E_ERROR_SYSTEM("Failed to write frame to senone file"); - return -1; -} - -/** - * Internal version, used for reading previous frames in acmod_score() - */ -static int -acmod_read_scores_internal(acmod_t *acmod) -{ - FILE *senfh = acmod->insenfh; - int16 n_active; - size_t rv; - - if (acmod->n_feat_frame == acmod->n_feat_alloc) { - if (acmod->grow_feat) - acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); - else - return 0; - } - - if (senfh == NULL) - return -1; - - if ((rv = fread(&n_active, 2, 1, senfh)) != 1) - goto error_out; - - acmod->n_senone_active = n_active; - if (acmod->n_senone_active == bin_mdef_n_sen(acmod->mdef)) { - if ((rv = fread(acmod->senone_scores, 2, - acmod->n_senone_active, senfh)) != acmod->n_senone_active) - goto error_out; - } - else { - int i, n; - - if ((rv = fread(acmod->senone_active, 1, - acmod->n_senone_active, senfh)) != acmod->n_senone_active) - goto error_out; - - for (i = 0, n = 0; i < acmod->n_senone_active; ++i) { - int j, sen = n + acmod->senone_active[i]; - for (j = n + 1; j < sen; ++j) - acmod->senone_scores[j] = SENSCR_DUMMY; - - if ((rv = fread(acmod->senone_scores + sen, 2, 1, senfh)) != 1) - goto error_out; - - n = sen; - } - - n++; - while (n < bin_mdef_n_sen(acmod->mdef)) - acmod->senone_scores[n++] = SENSCR_DUMMY; - } - return 1; - -error_out: - if (ferror(senfh)) { - E_ERROR_SYSTEM("Failed to read frame from senone file"); - return -1; - } - return 0; -} - -int -acmod_read_scores(acmod_t *acmod) -{ - int inptr, rv; - - if (acmod->grow_feat) { - /* Grow to avoid wraparound if grow_feat == TRUE. */ - inptr = acmod->feat_outidx + acmod->n_feat_frame; - /* Has to be +1, otherwise, next time acmod_advance() is - * called, this will wrap around. */ - while (inptr + 1 >= acmod->n_feat_alloc) - acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); - } - else { - inptr = (acmod->feat_outidx + acmod->n_feat_frame) % - acmod->n_feat_alloc; - } - - if ((rv = acmod_read_scores_internal(acmod)) != 1) - return rv; - - /* Set acmod->senscr_frame appropriately so that these scores - get reused below in acmod_score(). */ - acmod->senscr_frame = acmod->output_frame + acmod->n_feat_frame; - - E_DEBUG(1,("Frame %d has %d active states\n", - acmod->senscr_frame, acmod->n_senone_active)); - - /* Increment the "feature frame counter" and record the file - * position for the relevant frame in the (possibly circular) - * buffer. */ - ++acmod->n_feat_frame; - acmod->framepos[inptr] = ftell(acmod->insenfh); - - return 1; -} - -static int -calc_frame_idx(acmod_t *acmod, int *inout_frame_idx) -{ - int frame_idx; - - /* Calculate the absolute frame index to be scored. */ - if (inout_frame_idx == NULL) - frame_idx = acmod->output_frame; - else if (*inout_frame_idx < 0) - frame_idx = acmod->output_frame + 1 + *inout_frame_idx; - else - frame_idx = *inout_frame_idx; - - return frame_idx; -} - -static int -calc_feat_idx(acmod_t *acmod, int frame_idx) -{ - int n_backfr, feat_idx; - - n_backfr = acmod->n_feat_alloc - acmod->n_feat_frame; - if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) { - E_ERROR("Frame %d outside queue of %d frames, %d alloc (%d > %d), " - "cannot score\n", frame_idx, acmod->n_feat_frame, - acmod->n_feat_alloc, acmod->output_frame - frame_idx, - n_backfr); - return -1; - } - - /* Get the index in feat_buf/framepos of the frame to be scored. */ - feat_idx = (acmod->feat_outidx + frame_idx - acmod->output_frame) % - acmod->n_feat_alloc; - if (feat_idx < 0) - feat_idx += acmod->n_feat_alloc; - - return feat_idx; -} - -mfcc_t ** -acmod_get_frame(acmod_t *acmod, int *inout_frame_idx) -{ - int frame_idx, feat_idx; - - /* Calculate the absolute frame index requested. */ - frame_idx = calc_frame_idx(acmod, inout_frame_idx); - - /* Calculate position of requested frame in circular buffer. */ - if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0) - return NULL; - - if (inout_frame_idx) - *inout_frame_idx = frame_idx; - - return acmod->feat_buf[feat_idx]; -} - -int16 const * -acmod_score(acmod_t *acmod, int *inout_frame_idx) -{ - int frame_idx, feat_idx; - - /* Calculate the absolute frame index to be scored. */ - frame_idx = calc_frame_idx(acmod, inout_frame_idx); - - /* If all senones are being computed, or we are using a senone file, - then we can reuse existing scores. */ - if ((acmod->compallsen || acmod->insenfh) - && frame_idx == acmod->senscr_frame) { - if (inout_frame_idx) - *inout_frame_idx = frame_idx; - return acmod->senone_scores; - } - - /* Calculate position of requested frame in circular buffer. */ - if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0) - return NULL; - - /* - * If there is an input senone file locate the appropriate frame and read - * it. - */ - if (acmod->insenfh) { - fseek(acmod->insenfh, acmod->framepos[feat_idx], SEEK_SET); - if (acmod_read_scores_internal(acmod) < 0) - return NULL; - } - else { - /* Build active senone list. */ - acmod_flags2list(acmod); - - /* Generate scores for the next available frame */ - ps_mgau_frame_eval(acmod->mgau, - acmod->senone_scores, - acmod->senone_active, - acmod->n_senone_active, - acmod->feat_buf[feat_idx], - frame_idx, - acmod->compallsen); - } - - if (inout_frame_idx) - *inout_frame_idx = frame_idx; - acmod->senscr_frame = frame_idx; - - /* Dump scores to the senone dump file if one exists. */ - if (acmod->senfh) { - if (acmod_write_scores(acmod, acmod->n_senone_active, - acmod->senone_active, - acmod->senone_scores, - acmod->senfh) < 0) - return NULL; - E_DEBUG(1,("Frame %d has %d active states\n", frame_idx, - acmod->n_senone_active)); - } - - return acmod->senone_scores; -} - -int -acmod_best_score(acmod_t *acmod, int *out_best_senid) -{ - int i, best; - - best = SENSCR_DUMMY; - if (acmod->compallsen) { - for (i = 0; i < bin_mdef_n_sen(acmod->mdef); ++i) { - if (acmod->senone_scores[i] < best) { - best = acmod->senone_scores[i]; - *out_best_senid = i; - } - } - } - else { - int16 *senscr; - senscr = acmod->senone_scores; - for (i = 0; i < acmod->n_senone_active; ++i) { - senscr += acmod->senone_active[i]; - if (*senscr < best) { - best = *senscr; - *out_best_senid = i; - } - } - } - return best; -} - - -void -acmod_clear_active(acmod_t *acmod) -{ - if (acmod->compallsen) - return; - bitvec_clear_all(acmod->senone_active_vec, bin_mdef_n_sen(acmod->mdef)); - acmod->n_senone_active = 0; -} - -#define MPX_BITVEC_SET(a,h,i) \ - if (hmm_mpx_ssid(h,i) != BAD_SSID) \ - bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i)) -#define NONMPX_BITVEC_SET(a,h,i) \ - bitvec_set((a)->senone_active_vec, \ - hmm_nonmpx_senid(h,i)) - -void -acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm) -{ - int i; - - if (acmod->compallsen) - return; - if (hmm_is_mpx(hmm)) { - switch (hmm_n_emit_state(hmm)) { - case 5: - MPX_BITVEC_SET(acmod, hmm, 4); - MPX_BITVEC_SET(acmod, hmm, 3); - case 3: - MPX_BITVEC_SET(acmod, hmm, 2); - MPX_BITVEC_SET(acmod, hmm, 1); - MPX_BITVEC_SET(acmod, hmm, 0); - break; - default: - for (i = 0; i < hmm_n_emit_state(hmm); ++i) { - MPX_BITVEC_SET(acmod, hmm, i); - } - } - } - else { - switch (hmm_n_emit_state(hmm)) { - case 5: - NONMPX_BITVEC_SET(acmod, hmm, 4); - NONMPX_BITVEC_SET(acmod, hmm, 3); - case 3: - NONMPX_BITVEC_SET(acmod, hmm, 2); - NONMPX_BITVEC_SET(acmod, hmm, 1); - NONMPX_BITVEC_SET(acmod, hmm, 0); - break; - default: - for (i = 0; i < hmm_n_emit_state(hmm); ++i) { - NONMPX_BITVEC_SET(acmod, hmm, i); - } - } - } -} - -int32 -acmod_flags2list(acmod_t *acmod) -{ - int32 w, l, n, b, total_dists, total_words, extra_bits; - bitvec_t *flagptr; - - total_dists = bin_mdef_n_sen(acmod->mdef); - if (acmod->compallsen) { - acmod->n_senone_active = total_dists; - return total_dists; - } - total_words = total_dists / BITVEC_BITS; - extra_bits = total_dists % BITVEC_BITS; - w = n = l = 0; - for (flagptr = acmod->senone_active_vec; w < total_words; ++w, ++flagptr) { - if (*flagptr == 0) - continue; - for (b = 0; b < BITVEC_BITS; ++b) { - if (*flagptr & (1UL << b)) { - int32 sen = w * BITVEC_BITS + b; - int32 delta = sen - l; - /* Handle excessive deltas "lossily" by adding a few - extra senones to bridge the gap. */ - while (delta > 255) { - acmod->senone_active[n++] = 255; - delta -= 255; - } - acmod->senone_active[n++] = delta; - l = sen; - } - } - } - - for (b = 0; b < extra_bits; ++b) { - if (*flagptr & (1UL << b)) { - int32 sen = w * BITVEC_BITS + b; - int32 delta = sen - l; - /* Handle excessive deltas "lossily" by adding a few - extra senones to bridge the gap. */ - while (delta > 255) { - acmod->senone_active[n++] = 255; - delta -= 255; - } - acmod->senone_active[n++] = delta; - l = sen; - } - } - - acmod->n_senone_active = n; - E_DEBUG(1, ("acmod_flags2list: %d active in frame %d\n", - acmod->n_senone_active, acmod->output_frame)); - return n; -} - -int32 -acmod_stream_offset(acmod_t *acmod) -{ - return acmod->utt_start_frame; -} - -void -acmod_start_stream(acmod_t *acmod) -{ - fe_start_stream(acmod->fe); - acmod->utt_start_frame = 0; -} - -void -acmod_set_rawdata_size(acmod_t *acmod, int32 size) -{ - assert(size >= 0); - acmod->rawdata_size = size; - if (acmod->rawdata_size > 0) { - ckd_free(acmod->rawdata); - acmod->rawdata = ckd_calloc(size, sizeof(int16)); - } -} - -void -acmod_get_rawdata(acmod_t *acmod, int16 **buffer, int32 *size) -{ - if (buffer) { - *buffer = acmod->rawdata; - } - if (size) { - *size = acmod->rawdata_pos; - } -} - |