summaryrefslogtreecommitdiff
path: root/media/pocketsphinx/src/ngram_search.c
diff options
context:
space:
mode:
Diffstat (limited to 'media/pocketsphinx/src/ngram_search.c')
-rw-r--r--media/pocketsphinx/src/ngram_search.c1409
1 files changed, 0 insertions, 1409 deletions
diff --git a/media/pocketsphinx/src/ngram_search.c b/media/pocketsphinx/src/ngram_search.c
deleted file mode 100644
index 47e488c3af..0000000000
--- a/media/pocketsphinx/src/ngram_search.c
+++ /dev/null
@@ -1,1409 +0,0 @@
-/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
-/* ====================================================================
- * Copyright (c) 2008 Carnegie Mellon University. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * This work was supported in part by funding from the Defense Advanced
- * Research Projects Agency and the National Science Foundation of the
- * United States of America, and the CMU Sphinx Speech Consortium.
- *
- * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
- * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
- * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ====================================================================
- *
- */
-
-/**
- * @file ngram_search.c N-Gram based multi-pass search ("FBS")
- */
-
-/* System headers. */
-#include <string.h>
-#include <assert.h>
-
-/* SphinxBase headers. */
-#include <sphinxbase/ckd_alloc.h>
-#include <sphinxbase/listelem_alloc.h>
-#include <sphinxbase/err.h>
-
-/* Local headers. */
-#include "pocketsphinx_internal.h"
-#include "ps_lattice_internal.h"
-#include "ngram_search.h"
-#include "ngram_search_fwdtree.h"
-#include "ngram_search_fwdflat.h"
-
-static int ngram_search_start(ps_search_t *search);
-static int ngram_search_step(ps_search_t *search, int frame_idx);
-static int ngram_search_finish(ps_search_t *search);
-static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
-static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final);
-static int32 ngram_search_prob(ps_search_t *search);
-static ps_seg_t *ngram_search_seg_iter(ps_search_t *search, int32 *out_score);
-
-static ps_searchfuncs_t ngram_funcs = {
- /* name: */ "ngram",
- /* start: */ ngram_search_start,
- /* step: */ ngram_search_step,
- /* finish: */ ngram_search_finish,
- /* reinit: */ ngram_search_reinit,
- /* free: */ ngram_search_free,
- /* lattice: */ ngram_search_lattice,
- /* hyp: */ ngram_search_hyp,
- /* prob: */ ngram_search_prob,
- /* seg_iter: */ ngram_search_seg_iter,
-};
-
-static ngram_model_t *default_lm;
-
-static void
-ngram_search_update_widmap(ngram_search_t *ngs)
-{
- char const **words;
- int32 i, n_words;
-
- /* It's okay to include fillers since they won't be in the LM */
- n_words = ps_search_n_words(ngs);
- words = (char const**)ckd_calloc(n_words, sizeof(*words));
- /* This will include alternates, again, that's okay since they aren't in the LM */
- for (i = 0; i < n_words; ++i)
- words[i] = dict_wordstr(ps_search_dict(ngs), i);
- ngram_model_set_map_words(ngs->lmset, words, n_words);
- ckd_free(words);
-}
-
-static void
-ngram_search_calc_beams(ngram_search_t *ngs)
-{
- cmd_ln_t *config;
- acmod_t *acmod;
-
- config = ps_search_config(ngs);
- acmod = ps_search_acmod(ngs);
-
- /* Log beam widths. */
- ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))>>SENSCR_SHIFT;
- ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))>>SENSCR_SHIFT;
- ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))>>SENSCR_SHIFT;
- ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"))>>SENSCR_SHIFT;
- ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"))>>SENSCR_SHIFT;
- ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"))>>SENSCR_SHIFT;
- ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"))>>SENSCR_SHIFT;
-
- /* Absolute pruning parameters. */
- ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf");
- ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf");
-
- /* Various penalties which may or may not be useful. */
- ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) >>SENSCR_SHIFT;
- ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen")) >>SENSCR_SHIFT;
- ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) >>SENSCR_SHIFT;
- ngs->silpen = ngs->pip
- + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"))>>SENSCR_SHIFT);
- ngs->fillpen = ngs->pip
- + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"))>>SENSCR_SHIFT);
-
- /* Language weight ratios for fwdflat and bestpath search. */
- ngs->fwdflat_fwdtree_lw_ratio =
- cmd_ln_float32_r(config, "-fwdflatlw")
- / cmd_ln_float32_r(config, "-lw");
- ngs->bestpath_fwdtree_lw_ratio =
- cmd_ln_float32_r(config, "-bestpathlw")
- / cmd_ln_float32_r(config, "-lw");
-
- /* Acoustic score scale for posterior probabilities. */
- ngs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale");
-}
-
-ps_search_t *
-ngram_search_init(ngram_model_t *lm,
- cmd_ln_t *config,
- acmod_t *acmod,
- dict_t *dict,
- dict2pid_t *d2p)
-{
- ngram_search_t *ngs;
- static char *lmname = "default";
-
- /* Make the acmod's feature buffer growable if we are doing two-pass
- * search. */
- acmod_set_grow(acmod, cmd_ln_boolean_r(config, "-fwdflat") &&
- cmd_ln_boolean_r(config, "-fwdtree"));
-
- ngs = ckd_calloc(1, sizeof(*ngs));
- ps_search_init(&ngs->base, &ngram_funcs, config, acmod, dict, d2p);
- ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
- acmod->tmat->tp, NULL, acmod->mdef->sseq);
- if (ngs->hmmctx == NULL) {
- ps_search_free(ps_search_base(ngs));
- return NULL;
- }
- ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t));
- ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t));
- ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t));
-
- /* Calculate various beam widths and such. */
- ngram_search_calc_beams(ngs);
-
- /* Allocate a billion different tables for stuff. */
- ngs->word_chan = ckd_calloc(dict_size(dict),
- sizeof(*ngs->word_chan));
- ngs->word_lat_idx = ckd_calloc(dict_size(dict),
- sizeof(*ngs->word_lat_idx));
- ngs->word_active = bitvec_alloc(dict_size(dict));
- ngs->last_ltrans = ckd_calloc(dict_size(dict),
- sizeof(*ngs->last_ltrans));
-
- /* FIXME: All these structures need to be made dynamic with
- * garbage collection. */
- ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize");
- ngs->bp_table = ckd_calloc(ngs->bp_table_size,
- sizeof(*ngs->bp_table));
- /* FIXME: This thing is frickin' huge. */
- ngs->bscore_stack_size = ngs->bp_table_size * 20;
- ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
- sizeof(*ngs->bscore_stack));
- ngs->n_frame_alloc = 256;
- ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1,
- sizeof(*ngs->bp_table_idx));
- ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
-
- /* Allocate active word list array */
- ngs->active_word_list = ckd_calloc_2d(2, dict_size(dict),
- sizeof(**ngs->active_word_list));
-
- ngs->lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
- if (!ngs->lmset)
- goto error_out;
-
- if (ngram_wid(ngs->lmset, S3_FINISH_WORD) ==
- ngram_unknown_wid(ngs->lmset))
- {
- E_ERROR("Language model/set does not contain </s>, "
- "recognition will fail\n");
- goto error_out;
- }
-
- /* Create word mappings. */
- ngram_search_update_widmap(ngs);
-
- /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */
- if (cmd_ln_boolean_r(config, "-fwdtree")) {
- ngram_fwdtree_init(ngs);
- ngs->fwdtree = TRUE;
- ngs->fwdtree_perf.name = "fwdtree";
- ptmr_init(&ngs->fwdtree_perf);
- }
- if (cmd_ln_boolean_r(config, "-fwdflat")) {
- ngram_fwdflat_init(ngs);
- ngs->fwdflat = TRUE;
- ngs->fwdflat_perf.name = "fwdflat";
- ptmr_init(&ngs->fwdflat_perf);
- }
- if (cmd_ln_boolean_r(config, "-bestpath")) {
- ngs->bestpath = TRUE;
- ngs->bestpath_perf.name = "bestpath";
- ptmr_init(&ngs->bestpath_perf);
- }
-
- return (ps_search_t *)ngs;
-
-error_out:
- ngram_search_free((ps_search_t *)ngs);
- return NULL;
-}
-
-static int
-ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
-{
- ngram_search_t *ngs = (ngram_search_t *)search;
- int old_n_words;
- int rv = 0;
-
- /* Update the number of words. */
- old_n_words = search->n_words;
- if (old_n_words != dict_size(dict)) {
- search->n_words = dict_size(dict);
- /* Reallocate these temporary arrays. */
- ckd_free(ngs->word_lat_idx);
- ckd_free(ngs->word_active);
- ckd_free(ngs->last_ltrans);
- ckd_free_2d(ngs->active_word_list);
- ngs->word_lat_idx = ckd_calloc(search->n_words, sizeof(*ngs->word_lat_idx));
- ngs->word_active = bitvec_alloc(search->n_words);
- ngs->last_ltrans = ckd_calloc(search->n_words, sizeof(*ngs->last_ltrans));
- ngs->active_word_list
- = ckd_calloc_2d(2, search->n_words,
- sizeof(**ngs->active_word_list));
- }
-
- /* Free old dict2pid, dict */
- ps_search_base_reinit(search, dict, d2p);
-
- if (ngs->lmset == NULL)
- return 0;
-
- /* Update beam widths. */
- ngram_search_calc_beams(ngs);
-
- /* Update word mappings. */
- ngram_search_update_widmap(ngs);
-
- /* Now rebuild lextrees. */
- if (ngs->fwdtree) {
- if ((rv = ngram_fwdtree_reinit(ngs)) < 0)
- return rv;
- }
- if (ngs->fwdflat) {
- if ((rv = ngram_fwdflat_reinit(ngs)) < 0)
- return rv;
- }
-
- return rv;
-}
-
-void
-ngram_search_free(ps_search_t *search)
-{
- ngram_search_t *ngs = (ngram_search_t *)search;
-
- ps_search_deinit(search);
- if (ngs->fwdtree)
- ngram_fwdtree_deinit(ngs);
- if (ngs->fwdflat)
- ngram_fwdflat_deinit(ngs);
- if (ngs->bestpath) {
- double n_speech = (double)ngs->n_tot_frame
- / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
-
- E_INFO("TOTAL bestpath %.2f CPU %.3f xRT\n",
- ngs->bestpath_perf.t_tot_cpu,
- ngs->bestpath_perf.t_tot_cpu / n_speech);
- E_INFO("TOTAL bestpath %.2f wall %.3f xRT\n",
- ngs->bestpath_perf.t_tot_elapsed,
- ngs->bestpath_perf.t_tot_elapsed / n_speech);
- }
-
- hmm_context_free(ngs->hmmctx);
- listelem_alloc_free(ngs->chan_alloc);
- listelem_alloc_free(ngs->root_chan_alloc);
- listelem_alloc_free(ngs->latnode_alloc);
- ngram_model_free(ngs->lmset);
-
- ckd_free(ngs->word_chan);
- ckd_free(ngs->word_lat_idx);
- bitvec_free(ngs->word_active);
- ckd_free(ngs->bp_table);
- ckd_free(ngs->bscore_stack);
- if (ngs->bp_table_idx != NULL)
- ckd_free(ngs->bp_table_idx - 1);
- ckd_free_2d(ngs->active_word_list);
- ckd_free(ngs->last_ltrans);
- ckd_free(ngs);
-}
-
-int
-ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
-{
- if (frame_idx >= ngs->n_frame_alloc) {
- ngs->n_frame_alloc *= 2;
- ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
- (ngs->n_frame_alloc + 1)
- * sizeof(*ngs->bp_table_idx));
- if (ngs->frm_wordlist) {
- ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist,
- ngs->n_frame_alloc
- * sizeof(*ngs->frm_wordlist));
- }
- ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */
- }
- ngs->bp_table_idx[frame_idx] = ngs->bpidx;
- return ngs->bpidx;
-}
-
-static void
-set_real_wid(ngram_search_t *ngs, int32 bp)
-{
- bptbl_t *ent, *prev;
-
- assert(bp != NO_BP);
- ent = ngs->bp_table + bp;
- if (ent->bp == NO_BP)
- prev = NULL;
- else
- prev = ngs->bp_table + ent->bp;
-
- /* Propagate lm state for fillers, rotate it for words. */
- if (dict_filler_word(ps_search_dict(ngs), ent->wid)) {
- if (prev != NULL) {
- ent->real_wid = prev->real_wid;
- ent->prev_real_wid = prev->prev_real_wid;
- }
- else {
- ent->real_wid = dict_basewid(ps_search_dict(ngs),
- ent->wid);
- ent->prev_real_wid = BAD_S3WID;
- }
- }
- else {
- ent->real_wid = dict_basewid(ps_search_dict(ngs), ent->wid);
- if (prev != NULL)
- ent->prev_real_wid = prev->real_wid;
- else
- ent->prev_real_wid = BAD_S3WID;
- }
-}
-
-#define NGRAM_HISTORY_LONG_WORD 2000 /* 20s */
-
-void
-ngram_search_save_bp(ngram_search_t *ngs, int frame_idx,
- int32 w, int32 score, int32 path, int32 rc)
-{
- int32 bp;
-
- /* Look for an existing exit for this word in this frame. The
- * only reason one would exist is from a different right context
- * triphone, but of course that happens quite frequently. */
- bp = ngs->word_lat_idx[w];
- if (bp != NO_BP) {
-
- if (frame_idx - ngs->bp_table[path].frame > NGRAM_HISTORY_LONG_WORD) {
- E_WARN("Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
- frame_idx - ngs->bp_table[path].frame);
- }
-
- /* Keep only the best scoring one, we will reconstruct the
- * others from the right context scores - usually the history
- * is not lost. */
- if (ngs->bp_table[bp].score WORSE_THAN score) {
- assert(path != bp); /* Pathological. */
- if (ngs->bp_table[bp].bp != path) {
- int32 bplh[2], newlh[2];
- /* But, sometimes, the history *is* lost. If we wanted to
- * do exact language model scoring we'd have to preserve
- * these alternate histories. */
- E_DEBUG(2,("Updating path history %d => %d frame %d\n",
- ngs->bp_table[bp].bp, path, frame_idx));
- bplh[0] = ngs->bp_table[bp].bp == -1
- ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].prev_real_wid;
- bplh[1] = ngs->bp_table[bp].bp == -1
- ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].real_wid;
- newlh[0] = path == -1
- ? -1 : ngs->bp_table[path].prev_real_wid;
- newlh[1] = path == -1
- ? -1 : ngs->bp_table[path].real_wid;
- /* Actually it's worth checking how often the actual
- * language model state changes. */
- if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
- /* It's fairly rare that the actual language model
- * state changes, but it does happen some
- * times. */
- E_DEBUG(1, ("Updating language model state %s,%s => %s,%s frame %d\n",
- dict_wordstr(ps_search_dict(ngs), bplh[0]),
- dict_wordstr(ps_search_dict(ngs), bplh[1]),
- dict_wordstr(ps_search_dict(ngs), newlh[0]),
- dict_wordstr(ps_search_dict(ngs), newlh[1]),
- frame_idx));
- set_real_wid(ngs, bp);
- }
- ngs->bp_table[bp].bp = path;
- }
- ngs->bp_table[bp].score = score;
- }
- /* But do keep track of scores for all right contexts, since
- * we need them to determine the starting path scores for any
- * successors of this word exit. */
- if (ngs->bp_table[bp].s_idx != -1)
- ngs->bscore_stack[ngs->bp_table[bp].s_idx + rc] = score;
- }
- else {
- int32 i, rcsize;
- bptbl_t *be;
-
- /* This might happen if recognition fails. */
- if (ngs->bpidx == NO_BP) {
- E_ERROR("No entries in backpointer table!");
- return;
- }
-
- /* Expand the backpointer tables if necessary. */
- if (ngs->bpidx >= ngs->bp_table_size) {
- ngs->bp_table_size *= 2;
- ngs->bp_table = ckd_realloc(ngs->bp_table,
- ngs->bp_table_size
- * sizeof(*ngs->bp_table));
- E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size);
- }
- if (ngs->bss_head >= ngs->bscore_stack_size
- - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
- ngs->bscore_stack_size *= 2;
- ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
- ngs->bscore_stack_size
- * sizeof(*ngs->bscore_stack));
- E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size);
- }
-
- ngs->word_lat_idx[w] = ngs->bpidx;
- be = &(ngs->bp_table[ngs->bpidx]);
- be->wid = w;
- be->frame = frame_idx;
- be->bp = path;
- be->score = score;
- be->s_idx = ngs->bss_head;
- be->valid = TRUE;
- assert(path != ngs->bpidx);
-
- /* DICT2PID */
- /* Get diphone ID for final phone and number of ssids corresponding to it. */
- be->last_phone = dict_last_phone(ps_search_dict(ngs),w);
- if (dict_is_single_phone(ps_search_dict(ngs), w)) {
- be->last2_phone = -1;
- be->s_idx = -1;
- rcsize = 0;
- }
- else {
- be->last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
- rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
- be->last_phone, be->last2_phone)->n_ssid;
- }
- /* Allocate some space on the bscore_stack for all of these triphones. */
- for (i = 0; i < rcsize; ++i)
- ngs->bscore_stack[ngs->bss_head + i] = WORST_SCORE;
- if (rcsize)
- ngs->bscore_stack[ngs->bss_head + rc] = score;
- set_real_wid(ngs, ngs->bpidx);
-
- ngs->bpidx++;
- ngs->bss_head += rcsize;
- }
-}
-
-int
-ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score, int32 *out_is_final)
-{
- /* End of backpointers for this frame. */
- int end_bpidx;
- int best_exit, bp;
- int32 best_score;
-
- /* No hypothesis means no exit node! */
- if (ngs->n_frame == 0)
- return NO_BP;
-
- if (frame_idx == -1 || frame_idx >= ngs->n_frame)
- frame_idx = ngs->n_frame - 1;
- end_bpidx = ngs->bp_table_idx[frame_idx];
-
- best_score = WORST_SCORE;
- best_exit = NO_BP;
-
- /* Scan back to find a frame with some backpointers in it. */
- while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
- --frame_idx;
- /* This is NOT an error, it just means there is no hypothesis yet. */
- if (frame_idx < 0)
- return NO_BP;
-
- /* Now find the entry for </s> OR the best scoring entry. */
- assert(end_bpidx < ngs->bp_table_size);
- for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
- if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)
- || ngs->bp_table[bp].score BETTER_THAN best_score) {
- best_score = ngs->bp_table[bp].score;
- best_exit = bp;
- }
- if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs))
- break;
- }
-
- if (out_best_score) {
- *out_best_score = best_score;
- }
- if (out_is_final) {
- *out_is_final = (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs));
- }
- return best_exit;
-}
-
-char const *
-ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
-{
- ps_search_t *base = ps_search_base(ngs);
- char *c;
- size_t len;
- int bp;
-
- if (bpidx == NO_BP)
- return NULL;
-
- bp = bpidx;
- len = 0;
- while (bp != NO_BP) {
- bptbl_t *be = &ngs->bp_table[bp];
- bp = be->bp;
- if (dict_real_word(ps_search_dict(ngs), be->wid))
- len += strlen(dict_basestr(ps_search_dict(ngs), be->wid)) + 1;
- }
-
- ckd_free(base->hyp_str);
- if (len == 0) {
- base->hyp_str = NULL;
- return base->hyp_str;
- }
- base->hyp_str = ckd_calloc(1, len);
-
- bp = bpidx;
- c = base->hyp_str + len - 1;
- while (bp != NO_BP) {
- bptbl_t *be = &ngs->bp_table[bp];
- size_t len;
-
- bp = be->bp;
- if (dict_real_word(ps_search_dict(ngs), be->wid)) {
- len = strlen(dict_basestr(ps_search_dict(ngs), be->wid));
- c -= len;
- memcpy(c, dict_basestr(ps_search_dict(ngs), be->wid), len);
- if (c > base->hyp_str) {
- --c;
- *c = ' ';
- }
- }
- }
-
- return base->hyp_str;
-}
-
-void
-ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
-{
- chan_t *hmm, *thmm;
- xwdssid_t *rssid;
- int32 i, tmatid, ciphone;
-
- /* DICT2PID */
- /* Get pointer to array of triphones for final diphone. */
- assert(!dict_is_single_phone(ps_search_dict(ngs), w));
- ciphone = dict_last_phone(ps_search_dict(ngs),w);
- rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
- ciphone,
- dict_second_last_phone(ps_search_dict(ngs),w));
- tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
- hmm = ngs->word_chan[w];
- if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->hmm) != rssid->ssid[0])) {
- hmm = listelem_malloc(ngs->chan_alloc);
- hmm->next = ngs->word_chan[w];
- ngs->word_chan[w] = hmm;
-
- hmm->info.rc_id = 0;
- hmm->ciphone = ciphone;
- hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[0], tmatid);
- E_DEBUG(3,("allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
- rssid->ssid[0], hmm->ciphone,
- dict_second_last_phone(ps_search_dict(ngs),w),
- dict_wordstr(ps_search_dict(ngs),w)));
- }
- for (i = 1; i < rssid->n_ssid; ++i) {
- if ((hmm->next == NULL) || (hmm_nonmpx_ssid(&hmm->next->hmm) != rssid->ssid[i])) {
- thmm = listelem_malloc(ngs->chan_alloc);
- thmm->next = hmm->next;
- hmm->next = thmm;
- hmm = thmm;
-
- hmm->info.rc_id = i;
- hmm->ciphone = ciphone;
- hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[i], tmatid);
- E_DEBUG(3,("allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
- i, rssid->ssid[i], hmm->ciphone,
- dict_second_last_phone(ps_search_dict(ngs),w),
- dict_wordstr(ps_search_dict(ngs),w)));
- }
- else
- hmm = hmm->next;
- }
-}
-
-void
-ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
-{
- chan_t *hmm, *thmm;
-
- for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) {
- thmm = hmm->next;
- hmm_deinit(&hmm->hmm);
- listelem_free(ngs->chan_alloc, hmm);
- }
- ngs->word_chan[w] = NULL;
-}
-
-int32
-ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
-{
- /* DICT2PID */
- /* Get the mapping from right context phone ID to index in the
- * right context table and the bscore_stack. */
- if (pbe->last2_phone == -1) {
- /* No right context for single phone predecessor words. */
- return pbe->score;
- }
- else {
- xwdssid_t *rssid;
- /* Find the index for the last diphone of the previous word +
- * the first phone of the current word. */
- rssid = dict2pid_rssid(ps_search_dict2pid(ngs),
- pbe->last_phone, pbe->last2_phone);
- /* This may be WORST_SCORE, which means that there was no exit
- * with rcphone as right context. */
- return ngs->bscore_stack[pbe->s_idx + rssid->cimap[rcphone]];
- }
-}
-
-/*
- * Compute acoustic and LM scores for a BPTable entry (segment).
- */
-void
-ngram_compute_seg_score(ngram_search_t *ngs, bptbl_t *be, float32 lwf,
- int32 *out_ascr, int32 *out_lscr)
-{
- bptbl_t *pbe;
- int32 start_score;
-
- /* Start of utterance. */
- if (be->bp == NO_BP) {
- *out_ascr = be->score;
- *out_lscr = 0;
- return;
- }
-
- /* Otherwise, calculate lscr and ascr. */
- pbe = ngs->bp_table + be->bp;
- start_score = ngram_search_exit_score(ngs, pbe,
- dict_first_phone(ps_search_dict(ngs),be->wid));
- assert(start_score BETTER_THAN WORST_SCORE);
-
- /* FIXME: These result in positive acoustic scores when filler
- words have non-filler pronunciations. That whole business
- is still pretty much broken but at least it doesn't
- segfault. */
- if (be->wid == ps_search_silence_wid(ngs)) {
- *out_lscr = ngs->silpen;
- }
- else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
- *out_lscr = ngs->fillpen;
- }
- else {
- int32 n_used;
- *out_lscr = ngram_tg_score(ngs->lmset,
- be->real_wid,
- pbe->real_wid,
- pbe->prev_real_wid,
- &n_used)>>SENSCR_SHIFT;
- *out_lscr = *out_lscr * lwf;
- }
- *out_ascr = be->score - start_score - *out_lscr;
-}
-
-static int
-ngram_search_start(ps_search_t *search)
-{
- ngram_search_t *ngs = (ngram_search_t *)search;
-
- ngs->done = FALSE;
- ngram_model_flush(ngs->lmset);
- if (ngs->fwdtree)
- ngram_fwdtree_start(ngs);
- else if (ngs->fwdflat)
- ngram_fwdflat_start(ngs);
- else
- return -1;
- return 0;
-}
-
-static int
-ngram_search_step(ps_search_t *search, int frame_idx)
-{
- ngram_search_t *ngs = (ngram_search_t *)search;
-
- if (ngs->fwdtree)
- return ngram_fwdtree_search(ngs, frame_idx);
- else if (ngs->fwdflat)
- return ngram_fwdflat_search(ngs, frame_idx);
- else
- return -1;
-}
-
-void
-dump_bptable(ngram_search_t *ngs)
-{
- int i;
- E_INFO("Backpointer table (%d entries):\n", ngs->bpidx);
- for (i = 0; i < ngs->bpidx; ++i) {
- bptbl_t *bpe = ngs->bp_table + i;
- int j, rcsize;
-
- E_INFO_NOFN("%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
- i, dict_wordstr(ps_search_dict(ngs), bpe->wid),
- (bpe->bp == -1
- ? 0 : ngs->bp_table[bpe->bp].frame + 1),
- bpe->frame, bpe->score, bpe->bp,
- bpe->real_wid, bpe->prev_real_wid);
-
- if (bpe->last2_phone == -1)
- rcsize = 0;
- else
- rcsize = dict2pid_rssid(ps_search_dict2pid(ngs),
- bpe->last_phone, bpe->last2_phone)->n_ssid;
- if (rcsize) {
- E_INFOCONT("\tbss");
- for (j = 0; j < rcsize; ++j)
- if (ngs->bscore_stack[bpe->s_idx + j] != WORST_SCORE)
- E_INFOCONT(" %d", bpe->score - ngs->bscore_stack[bpe->s_idx + j]);
- }
- E_INFOCONT("\n");
- }
-}
-
-static int
-ngram_search_finish(ps_search_t *search)
-{
- ngram_search_t *ngs = (ngram_search_t *)search;
-
- ngs->n_tot_frame += ngs->n_frame;
- if (ngs->fwdtree) {
- ngram_fwdtree_finish(ngs);
- /* dump_bptable(ngs); */
-
- /* Now do fwdflat search in its entirety, if requested. */
- if (ngs->fwdflat) {
- int i;
- /* Rewind the acoustic model. */
- if (acmod_rewind(ps_search_acmod(ngs)) < 0)
- return -1;
- /* Now redo search. */
- ngram_fwdflat_start(ngs);
- i = 0;
- while (ps_search_acmod(ngs)->n_feat_frame > 0) {
- int nfr;
- if ((nfr = ngram_fwdflat_search(ngs, i)) < 0)
- return nfr;
- acmod_advance(ps_search_acmod(ngs));
- ++i;
- }
- ngram_fwdflat_finish(ngs);
- /* And now, we should have a result... */
- /* dump_bptable(ngs); */
- }
- }
- else if (ngs->fwdflat) {
- ngram_fwdflat_finish(ngs);
- }
-
- /* Mark the current utterance as done. */
- ngs->done = TRUE;
- return 0;
-}
-
-static ps_latlink_t *
-ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward)
-{
- ngram_search_t *ngs = (ngram_search_t *)search;
-
- if (search->last_link == NULL) {
- search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset,
- ngs->bestpath_fwdtree_lw_ratio,
- ngs->ascale);
- if (search->last_link == NULL)
- return NULL;
- /* Also calculate betas so we can fill in the posterior
- * probability field in the segmentation. */
- if (search->post == 0)
- search->post = ps_lattice_posterior(search->dag, ngs->lmset,
- ngs->ascale);
- }
- if (out_score)
- *out_score = search->last_link->path_scr + search->dag->final_node_ascr;
- return search->last_link;
-}
-
-static char const *
-ngram_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final)
-{
- ngram_search_t *ngs = (ngram_search_t *)search;
-
- /* Only do bestpath search if the utterance is complete. */
- if (ngs->bestpath && ngs->done) {
- ps_lattice_t *dag;
- ps_latlink_t *link;
- char const *hyp;
- double n_speech;
-
- ptmr_reset(&ngs->bestpath_perf);
- ptmr_start(&ngs->bestpath_perf);
- if ((dag = ngram_search_lattice(search)) == NULL)
- return NULL;
- if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
- return NULL;
- hyp = ps_lattice_hyp(dag, link);
- ptmr_stop(&ngs->bestpath_perf);
- n_speech = (double)dag->n_frames
- / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
- E_INFO("bestpath %.2f CPU %.3f xRT\n",
- ngs->bestpath_perf.t_cpu,
- ngs->bestpath_perf.t_cpu / n_speech);
- E_INFO("bestpath %.2f wall %.3f xRT\n",
- ngs->bestpath_perf.t_elapsed,
- ngs->bestpath_perf.t_elapsed / n_speech);
- return hyp;
- }
- else {
- int32 bpidx;
-
- /* fwdtree and fwdflat use same backpointer table. */
- bpidx = ngram_search_find_exit(ngs, -1, out_score, out_is_final);
- if (bpidx != NO_BP)
- return ngram_search_bp_hyp(ngs, bpidx);
- }
-
- return NULL;
-}
-
-static void
-ngram_search_bp2itor(ps_seg_t *seg, int bp)
-{
- ngram_search_t *ngs = (ngram_search_t *)seg->search;
- bptbl_t *be, *pbe;
-
- be = &ngs->bp_table[bp];
- pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp];
- seg->word = dict_wordstr(ps_search_dict(ngs), be->wid);
- seg->ef = be->frame;
- seg->sf = pbe ? pbe->frame + 1 : 0;
- seg->prob = 0; /* Bogus value... */
- /* Compute acoustic and LM scores for this segment. */
- if (pbe == NULL) {
- seg->ascr = be->score;
- seg->lscr = 0;
- seg->lback = 0;
- }
- else {
- int32 start_score;
-
- /* Find ending path score of previous word. */
- start_score = ngram_search_exit_score(ngs, pbe,
- dict_first_phone(ps_search_dict(ngs), be->wid));
- assert(start_score BETTER_THAN WORST_SCORE);
- if (be->wid == ps_search_silence_wid(ngs)) {
- seg->lscr = ngs->silpen;
- }
- else if (dict_filler_word(ps_search_dict(ngs), be->wid)) {
- seg->lscr = ngs->fillpen;
- }
- else {
- seg->lscr = ngram_tg_score(ngs->lmset,
- be->real_wid,
- pbe->real_wid,
- pbe->prev_real_wid,
- &seg->lback)>>SENSCR_SHIFT;
- seg->lscr = (int32)(seg->lscr * seg->lwf);
- }
- seg->ascr = be->score - start_score - seg->lscr;
- }
-}
-
-static void
-ngram_bp_seg_free(ps_seg_t *seg)
-{
- bptbl_seg_t *itor = (bptbl_seg_t *)seg;
-
- ckd_free(itor->bpidx);
- ckd_free(itor);
-}
-
-static ps_seg_t *
-ngram_bp_seg_next(ps_seg_t *seg)
-{
- bptbl_seg_t *itor = (bptbl_seg_t *)seg;
-
- if (++itor->cur == itor->n_bpidx) {
- ngram_bp_seg_free(seg);
- return NULL;
- }
-
- ngram_search_bp2itor(seg, itor->bpidx[itor->cur]);
- return seg;
-}
-
-static ps_segfuncs_t ngram_bp_segfuncs = {
- /* seg_next */ ngram_bp_seg_next,
- /* seg_free */ ngram_bp_seg_free
-};
-
-static ps_seg_t *
-ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf)
-{
- bptbl_seg_t *itor;
- int bp, cur;
-
- /* Calling this an "iterator" is a bit of a misnomer since we have
- * to get the entire backtrace in order to produce it. On the
- * other hand, all we actually need is the bptbl IDs, and we can
- * allocate a fixed-size array of them. */
- itor = ckd_calloc(1, sizeof(*itor));
- itor->base.vt = &ngram_bp_segfuncs;
- itor->base.search = ps_search_base(ngs);
- itor->base.lwf = lwf;
- itor->n_bpidx = 0;
- bp = bpidx;
- while (bp != NO_BP) {
- bptbl_t *be = &ngs->bp_table[bp];
- bp = be->bp;
- ++itor->n_bpidx;
- }
- if (itor->n_bpidx == 0) {
- ckd_free(itor);
- return NULL;
- }
- itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx));
- cur = itor->n_bpidx - 1;
- bp = bpidx;
- while (bp != NO_BP) {
- bptbl_t *be = &ngs->bp_table[bp];
- itor->bpidx[cur] = bp;
- bp = be->bp;
- --cur;
- }
-
- /* Fill in relevant fields for first element. */
- ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]);
-
- return (ps_seg_t *)itor;
-}
-
-static ps_seg_t *
-ngram_search_seg_iter(ps_search_t *search, int32 *out_score)
-{
- ngram_search_t *ngs = (ngram_search_t *)search;
-
- /* Only do bestpath search if the utterance is done. */
- if (ngs->bestpath && ngs->done) {
- ps_lattice_t *dag;
- ps_latlink_t *link;
- double n_speech;
- ps_seg_t *itor;
-
- ptmr_reset(&ngs->bestpath_perf);
- ptmr_start(&ngs->bestpath_perf);
- if ((dag = ngram_search_lattice(search)) == NULL)
- return NULL;
- if ((link = ngram_search_bestpath(search, out_score, TRUE)) == NULL)
- return NULL;
- itor = ps_lattice_seg_iter(dag, link,
- ngs->bestpath_fwdtree_lw_ratio);
- ptmr_stop(&ngs->bestpath_perf);
- n_speech = (double)dag->n_frames
- / cmd_ln_int32_r(ps_search_config(ngs), "-frate");
- E_INFO("bestpath %.2f CPU %.3f xRT\n",
- ngs->bestpath_perf.t_cpu,
- ngs->bestpath_perf.t_cpu / n_speech);
- E_INFO("bestpath %.2f wall %.3f xRT\n",
- ngs->bestpath_perf.t_elapsed,
- ngs->bestpath_perf.t_elapsed / n_speech);
- return itor;
- }
- else {
- int32 bpidx;
-
- /* fwdtree and fwdflat use same backpointer table. */
- bpidx = ngram_search_find_exit(ngs, -1, out_score, NULL);
- return ngram_search_bp_iter(ngs, bpidx,
- /* but different language weights... */
- (ngs->done && ngs->fwdflat)
- ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
- }
-
- return NULL;
-}
-
-static int32
-ngram_search_prob(ps_search_t *search)
-{
- ngram_search_t *ngs = (ngram_search_t *)search;
-
- /* Only do bestpath search if the utterance is done. */
- if (ngs->bestpath && ngs->done) {
- ps_lattice_t *dag;
- ps_latlink_t *link;
-
- if ((dag = ngram_search_lattice(search)) == NULL)
- return 0;
- if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
- return 0;
- return search->post;
- }
- else {
- /* FIXME: Give some kind of good estimate here, eventually. */
- return 0;
- }
-}
-
-static void
-create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag)
-{
- bptbl_t *bp_ptr;
- int32 i;
-
- for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
- int32 sf, ef, wid;
- ps_latnode_t *node;
-
- /* Skip invalid backpointers (these result from -maxwpf pruning) */
- if (!bp_ptr->valid)
- continue;
-
- sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1;
- ef = bp_ptr->frame;
- wid = bp_ptr->wid;
-
- assert(ef < dag->n_frames);
- /* Skip non-final </s> entries. */
- if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1))
- continue;
-
- /* Skip if word not in LM */
- if ((!dict_filler_word(ps_search_dict(ngs), wid))
- && (!ngram_model_set_known_wid(ngs->lmset,
- dict_basewid(ps_search_dict(ngs), wid))))
- continue;
-
- /* See if bptbl entry <wid,sf> already in lattice */
- for (node = dag->nodes; node; node = node->next) {
- if ((node->wid == wid) && (node->sf == sf))
- break;
- }
-
- /* For the moment, store bptbl indices in node.{fef,lef} */
- if (node)
- node->lef = i;
- else {
- /* New node; link to head of list */
- node = listelem_malloc(dag->latnode_alloc);
- node->wid = wid;
- node->sf = sf; /* This is a frame index. */
- node->fef = node->lef = i; /* These are backpointer indices (argh) */
- node->reachable = FALSE;
- node->entries = NULL;
- node->exits = NULL;
-
- /* NOTE: This creates the list of nodes in reverse
- * topological order, i.e. a node always precedes its
- * antecedents in this list. */
- node->next = dag->nodes;
- dag->nodes = node;
- ++dag->n_nodes;
- }
- }
-}
-
-static ps_latnode_t *
-find_start_node(ngram_search_t *ngs, ps_lattice_t *dag)
-{
- ps_latnode_t *node;
-
- /* Find start node <s>.0 */
- for (node = dag->nodes; node; node = node->next) {
- if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0))
- break;
- }
- if (!node) {
- /* This is probably impossible. */
- E_ERROR("Couldn't find <s> in first frame\n");
- return NULL;
- }
- return node;
-}
-
-static ps_latnode_t *
-find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf)
-{
- ps_latnode_t *node;
- int32 ef, bestbp, bp, bestscore;
-
- /* Find final node </s>.last_frame; nothing can follow this node */
- for (node = dag->nodes; node; node = node->next) {
- int32 lef = ngs->bp_table[node->lef].frame;
- if ((node->wid == ps_search_finish_wid(ngs))
- && (lef == dag->n_frames - 1))
- break;
- }
- if (node != NULL)
- return node;
-
- /* It is quite likely that no </s> exited in the last frame. So,
- * find the node corresponding to the best exit. */
- /* Find the last frame containing a word exit. */
- for (ef = dag->n_frames - 1;
- ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
- --ef);
- if (ef < 0) {
- E_ERROR("Empty backpointer table: can not build DAG.\n");
- return NULL;
- }
-
- /* Find best word exit in that frame. */
- bestscore = WORST_SCORE;
- bestbp = NO_BP;
- for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
- int32 n_used, l_scr, wid, prev_wid;
- wid = ngs->bp_table[bp].real_wid;
- prev_wid = ngs->bp_table[bp].prev_real_wid;
- /* Always prefer </s>, of which there will only be one per frame. */
- if (wid == ps_search_finish_wid(ngs)) {
- bestbp = bp;
- break;
- }
- l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs),
- wid, prev_wid, &n_used) >>SENSCR_SHIFT;
- l_scr = l_scr * lwf;
- if (ngs->bp_table[bp].score + l_scr BETTER_THAN bestscore) {
- bestscore = ngs->bp_table[bp].score + l_scr;
- bestbp = bp;
- }
- }
- if (bestbp == NO_BP) {
- E_ERROR("No word exits found in last frame (%d), assuming no recognition\n", ef);
- return NULL;
- }
- E_INFO("</s> not found in last frame, using %s.%d instead\n",
- dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid), ef);
-
- /* Now find the node that corresponds to it. */
- for (node = dag->nodes; node; node = node->next) {
- if (node->lef == bestbp)
- return node;
- }
-
- /* FIXME: This seems to happen a lot! */
- E_ERROR("Failed to find DAG node corresponding to %s\n",
- dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid));
- return NULL;
-}
-
-/*
- * Build lattice from bptable.
- */
-ps_lattice_t *
-ngram_search_lattice(ps_search_t *search)
-{
- int32 i, score, ascr, lscr;
- ps_latnode_t *node, *from, *to;
- ngram_search_t *ngs;
- ps_lattice_t *dag;
- int min_endfr, nlink;
- float lwf;
-
- ngs = (ngram_search_t *)search;
- min_endfr = cmd_ln_int32_r(ps_search_config(search), "-min_endfr");
-
- /* If the best score is WORST_SCORE or worse, there is no way to
- * make a lattice. */
- if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE)
- return NULL;
-
- /* Check to see if a lattice has previously been created over the
- * same number of frames, and reuse it if so. */
- if (search->dag && search->dag->n_frames == ngs->n_frame)
- return search->dag;
-
- /* Nope, create a new one. */
- ps_lattice_free(search->dag);
- search->dag = NULL;
- dag = ps_lattice_init_search(search, ngs->n_frame);
- /* Compute these such that they agree with the fwdtree language weight. */
- lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
- create_dag_nodes(ngs, dag);
- if ((dag->start = find_start_node(ngs, dag)) == NULL)
- goto error_out;
- if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
- goto error_out;
- E_INFO("lattice start node %s.%d end node %s.%d\n",
- dict_wordstr(search->dict, dag->start->wid), dag->start->sf,
- dict_wordstr(search->dict, dag->end->wid), dag->end->sf);
-
- ngram_compute_seg_score(ngs, ngs->bp_table + dag->end->lef, lwf,
- &dag->final_node_ascr, &lscr);
-
- /*
- * At this point, dag->nodes is ordered such that nodes earlier in
- * the list can follow (in time) those later in the list, but not
- * vice versa (see above - also note that adjacency is purely
- * determined by time which is why we can make this claim). Now
- * create precedence links and simultanesously mark all nodes that
- * can reach dag->end. (All nodes are reached from dag->start
- * simply by definition - they were created that way).
- *
- * Note that this also means that any nodes before dag->end in the
- * list can be discarded, meaning that dag->end will always be
- * equal to dag->nodes (FIXME: except when loading from a file but
- * we can fix that...)
- */
- i = 0;
- while (dag->nodes && dag->nodes != dag->end) {
- ps_latnode_t *next = dag->nodes->next;
- listelem_free(dag->latnode_alloc, dag->nodes);
- dag->nodes = next;
- ++i;
- }
- E_INFO("Eliminated %d nodes before end node\n", i);
- dag->end->reachable = TRUE;
- nlink = 0;
- for (to = dag->end; to; to = to->next) {
- int fef, lef;
-
- /* Skip if not reachable; it will never be reachable from dag->end */
- if (!to->reachable)
- continue;
-
- /* Prune nodes with too few endpoints - heuristic
- borrowed from Sphinx3 */
- fef = ngs->bp_table[to->fef].frame;
- lef = ngs->bp_table[to->lef].frame;
- if (to != dag->end && lef - fef < min_endfr) {
- to->reachable = FALSE;
- continue;
- }
-
- /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */
- for (from = to->next; from; from = from->next) {
- bptbl_t *from_bpe;
-
- fef = ngs->bp_table[from->fef].frame;
- lef = ngs->bp_table[from->lef].frame;
-
- if ((to->sf <= fef) || (to->sf > lef + 1))
- continue;
- if (lef - fef < min_endfr) {
- assert(!from->reachable);
- continue;
- }
-
- /* Find bptable entry for "from" that exactly precedes "to" */
- i = from->fef;
- from_bpe = ngs->bp_table + i;
- for (; i <= from->lef; i++, from_bpe++) {
- if (from_bpe->wid != from->wid)
- continue;
- if (from_bpe->frame >= to->sf - 1)
- break;
- }
-
- if ((i > from->lef) || (from_bpe->frame != to->sf - 1))
- continue;
-
- /* Find acoustic score from.sf->to.sf-1 with right context = to */
- /* This gives us from_bpe's best acoustic score. */
- ngram_compute_seg_score(ngs, from_bpe, lwf,
- &ascr, &lscr);
- /* Now find the exact path score for from->to, including
- * the appropriate final triphone. In fact this might not
- * exist. */
- score = ngram_search_exit_score(ngs, from_bpe,
- dict_first_phone(ps_search_dict(ngs), to->wid));
- /* Does not exist. Can't create a link here. */
- if (score == WORST_SCORE)
- continue;
- /* Adjust the arc score to match the correct triphone. */
- else
- score = ascr + (score - from_bpe->score);
- if (score BETTER_THAN 0) {
- /* Scores must be negative, or Bad Things will happen.
- In general, they are, except in corner cases
- involving filler words. We don't want to throw any
- links away so we'll keep these, but with some
- arbitrarily improbable but recognizable score. */
- ps_lattice_link(dag, from, to, -424242, from_bpe->frame);
- ++nlink;
- from->reachable = TRUE;
- }
- else if (score BETTER_THAN WORST_SCORE) {
- ps_lattice_link(dag, from, to, score, from_bpe->frame);
- ++nlink;
- from->reachable = TRUE;
- }
- }
- }
-
- /* There must be at least one path between dag->start and dag->end */
- if (!dag->start->reachable) {
- E_ERROR("End node of lattice isolated; unreachable\n");
- goto error_out;
- }
-
- for (node = dag->nodes; node; node = node->next) {
- /* Change node->{fef,lef} from bptbl indices to frames. */
- node->fef = ngs->bp_table[node->fef].frame;
- node->lef = ngs->bp_table[node->lef].frame;
- /* Find base wid for nodes. */
- node->basewid = dict_basewid(search->dict, node->wid);
- }
-
- /* Link nodes with alternate pronunciations at the same timepoint. */
- for (node = dag->nodes; node; node = node->next) {
- ps_latnode_t *alt;
- /* Scan forward to find the next alternate, then stop. */
- for (alt = node->next; alt && alt->sf == node->sf; alt = alt->next) {
- if (alt->basewid == node->basewid) {
- alt->alt = node->alt;
- node->alt = alt;
- break;
- }
- }
- }
- E_INFO("Lattice has %d nodes, %d links\n", dag->n_nodes, nlink);
-
- /* Minor hack: If the final node is a filler word and not </s>,
- * then set its base word ID to </s>, so that the language model
- * scores won't be screwed up. */
- if (dict_filler_word(ps_search_dict(ngs), dag->end->wid))
- dag->end->basewid = ps_search_finish_wid(ngs);
-
- /* Free nodes unreachable from dag->end and their links */
- ps_lattice_delete_unreachable(dag);
-
- /* Add silprob and fillprob to corresponding links */
- ps_lattice_penalize_fillers(dag, ngs->silpen, ngs->fillpen);
-
- search->dag = dag;
- return dag;
-
-error_out:
- ps_lattice_free(dag);
- return NULL;
-}
-
-void ngram_search_set_lm(ngram_model_t *lm)
-{
- default_lm = ngram_model_retain(lm);
-}
-