57 #include <sphinxbase/err.h>
58 #include <sphinxbase/ckd_alloc.h>
59 #include <sphinxbase/strfuncs.h>
60 #include <sphinxbase/cmd_ln.h>
65 #include "fsg_search_internal.h"
66 #include "fsg_history.h"
67 #include "fsg_lextree.h"
71 #define __FSG_DBG_CHAN__ 0
90 fsg_search_add_silences(
fsg_search_t *fsgs, fsg_model_t *fsg)
96 dict = ps_search_dict(fsgs);
110 fsg_model_add_silence(fsg,
"<sil>", -1,
111 cmd_ln_float32_r(ps_search_config(fsgs),
"-silprob"));
114 for (wid = dict_filler_start(dict); wid < dict_filler_end(dict); ++wid) {
115 char const *word = dict_wordstr(dict, wid);
116 if (wid == dict_startwid(dict) || wid == dict_finishwid(dict))
118 fsg_model_add_silence(fsg, word, -1,
119 cmd_ln_float32_r(ps_search_config(fsgs),
"-fillprob"));
128 fsg_search_check_dict(
fsg_search_t *fsgs, fsg_model_t *fsg)
133 dict = ps_search_dict(fsgs);
134 for (i = 0; i < fsg_model_n_word(fsg); ++i) {
138 word = fsg_model_word_str(fsg, i);
141 E_ERROR(
"The word '%s' is missing in the dictionary\n", word);
150 fsg_search_add_altpron(
fsg_search_t *fsgs, fsg_model_t *fsg)
156 dict = ps_search_dict(fsgs);
159 n_word = fsg_model_n_word(fsg);
160 for (i = 0; i < n_word; ++i) {
164 word = fsg_model_word_str(fsg, i);
167 while ((wid = dict_nextalt(dict, wid)) !=
BAD_S3WID) {
168 n_alt += fsg_model_add_alt(fsg, word, dict_wordstr(dict, wid));
173 E_INFO(
"Added %d alternate word transitions\n", n_alt);
178 fsg_search_init(
const char *name,
186 ps_search_init(ps_search_base(fsgs), &fsg_funcs, PS_SEARCH_TYPE_FSG, name, config, acmod, dict, d2p);
188 fsgs->
fsg = fsg_model_retain(fsg);
192 if (fsgs->
hmmctx == NULL) {
193 ps_search_free(ps_search_base(fsgs));
198 fsgs->
history = fsg_history_init(NULL, dict);
204 = (int32) logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-beam"))
207 = (int32) logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-pbeam"))
210 = (int32) logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-wbeam"))
214 fsgs->lw = cmd_ln_float32_r(config,
"-lw");
215 fsgs->pip = (int32) (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-pip"))
218 fsgs->
wip = (int32) (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-wip"))
223 fsgs->
ascale = 1.0 / cmd_ln_float32_r(config,
"-ascale");
225 E_INFO(
"FSG(beam: %d, pbeam: %d, wbeam: %d; wip: %d, pip: %d)\n",
227 fsgs->
wip, fsgs->pip);
229 if (!fsg_search_check_dict(fsgs, fsg)) {
230 fsg_search_free(ps_search_base(fsgs));
234 if (cmd_ln_boolean_r(config,
"-fsgusefiller") &&
235 !fsg_model_has_sil(fsg))
236 fsg_search_add_silences(fsgs, fsg);
238 if (cmd_ln_boolean_r(config,
"-fsgusealtpron") &&
239 !fsg_model_has_alt(fsg))
240 fsg_search_add_altpron(fsgs, fsg);
242 if (fsg_search_reinit(ps_search_base(fsgs),
243 ps_search_dict(fsgs),
244 ps_search_dict2pid(fsgs)) < 0)
246 ps_search_free(ps_search_base(fsgs));
250 ptmr_init(&fsgs->
perf);
252 return ps_search_base(fsgs);
260 double n_speech = (double)fsgs->n_tot_frame
261 / cmd_ln_int32_r(ps_search_config(fsgs),
"-frate");
263 E_INFO(
"TOTAL fsg %.2f CPU %.3f xRT\n",
264 fsgs->
perf.t_tot_cpu,
265 fsgs->
perf.t_tot_cpu / n_speech);
266 E_INFO(
"TOTAL fsg %.2f wall %.3f xRT\n",
267 fsgs->
perf.t_tot_elapsed,
268 fsgs->
perf.t_tot_elapsed / n_speech);
273 fsg_history_reset(fsgs->
history);
274 fsg_history_set_fsg(fsgs->
history, NULL, NULL);
275 fsg_history_free(fsgs->
history);
278 fsg_model_free(fsgs->
fsg);
299 ps_search_acmod(fsgs)->mdef,
303 fsg_history_set_fsg(fsgs->
history, fsgs->
fsg, dict);
318 for (gn = fsgs->
pnode_active; gn; gn = gnode_next(gn)) {
320 hmm = fsg_pnode_hmmptr(pnode);
321 assert(hmm_frame(hmm) == fsgs->
frame);
343 E_ERROR(
"Frame %d: No active HMM!!\n", fsgs->
frame);
347 for (n = 0, gn = fsgs->
pnode_active; gn; gn = gnode_next(gn), n++) {
351 hmm = fsg_pnode_hmmptr(pnode);
352 assert(hmm_frame(hmm) == fsgs->
frame);
355 E_INFO(
"pnode(%08x) active @frm %5d\n", (int32) pnode,
361 E_INFO(
"pnode(%08x) after eval @frm %5d\n",
362 (int32) pnode, fsgs->
frame);
371 E_INFO(
"[%5d] %6d HMM; bestscr: %11d\n", fsgs->
frame, n, bestscore);
376 maxhmmpf = cmd_ln_int32_r(ps_search_config(fsgs),
"-maxhmmpf");
377 if (maxhmmpf != -1 && n > maxhmmpf) {
399 if (n > fsg_lextree_n_pnode(fsgs->
lextree))
400 E_FATAL(
"PANIC! Frame %d: #HMM evaluated(%d) > #PNodes(%d)\n",
412 int32 newscore, thresh, nf;
415 assert(!fsg_pnode_leaf(pnode));
417 nf = fsgs->
frame + 1;
420 hmm = fsg_pnode_hmmptr(pnode);
422 for (child = fsg_pnode_succ(pnode);
423 child; child = fsg_pnode_sibling(child)) {
424 newscore = hmm_out_score(hmm) + child->logs2prob;
427 && (newscore
BETTER_THAN hmm_in_score(&child->hmm))) {
429 if (hmm_frame(&child->hmm) < nf) {
436 hmm_enter(&child->hmm, newscore, hmm_out_history(hmm), nf);
451 assert(fsg_pnode_leaf(pnode));
453 hmm = fsg_pnode_hmmptr(pnode);
454 fl = fsg_pnode_fsglink(pnode);
457 wid = fsg_link_wid(fl);
461 E_INFO(
"[%5d] Exit(%08x) %10d(score) %5d(pred)\n",
462 fsgs->
frame, (int32) pnode,
463 hmm_out_score(hmm), hmm_out_history(hmm));
470 if (fsg_model_is_filler(fsgs->
fsg, wid)
472 || (dict_is_single_phone(ps_search_dict(fsgs),
474 fsg_model_word_str(fsgs->
fsg, wid))))) {
479 fsg_history_entry_add(fsgs->
history,
483 hmm_out_history(hmm),
484 pnode->ci_ext, ctxt);
489 fsg_history_entry_add(fsgs->
history,
493 hmm_out_history(hmm),
494 pnode->ci_ext, pnode->ctxt);
511 int32 thresh, word_thresh, phone_thresh;
516 phone_thresh = fsgs->
bestscore + fsgs->pbeam;
519 for (gn = fsgs->
pnode_active; gn; gn = gnode_next(gn)) {
521 hmm = fsg_pnode_hmmptr(pnode);
523 if (hmm_bestscore(hmm) >= thresh) {
525 if (hmm_frame(hmm) == fsgs->
frame) {
526 hmm_frame(hmm) = fsgs->
frame + 1;
532 assert(hmm_frame(hmm) == fsgs->
frame + 1);
535 if (!fsg_pnode_leaf(pnode)) {
536 if (hmm_out_score(hmm) >= phone_thresh) {
538 fsg_search_pnode_trans(fsgs, pnode);
542 if (hmm_out_score(hmm) >= word_thresh) {
544 fsg_search_pnode_exit(fsgs, pnode);
558 int32 bpidx, n_entries, thresh, newscore;
567 n_entries = fsg_history_n_entries(fsgs->
history);
569 for (bpidx = fsgs->
bpidx_start; bpidx < n_entries; bpidx++) {
571 hist_entry = fsg_history_entry_get(fsgs->
history, bpidx);
573 l = fsg_hist_entry_fsglink(hist_entry);
576 s = l ? fsg_link_to_state(l) : fsg_model_start_state(fsg);
584 for (itor = fsg_model_arcs(fsg, s); itor;
585 itor = fsg_arciter_next(itor)) {
586 fsg_link_t *l = fsg_arciter_get(itor);
589 if (fsg_link_wid(l) != -1)
592 fsg_hist_entry_score(hist_entry) +
595 if (newscore >= thresh) {
596 fsg_history_entry_add(fsgs->
history, l,
597 fsg_hist_entry_frame(hist_entry),
600 fsg_hist_entry_lc(hist_entry),
601 fsg_hist_entry_rc(hist_entry));
615 int32 bpidx, n_entries;
618 int32 score, newscore, thresh, nf, d;
622 n_entries = fsg_history_n_entries(fsgs->
history);
625 nf = fsgs->
frame + 1;
627 for (bpidx = fsgs->
bpidx_start; bpidx < n_entries; bpidx++) {
628 hist_entry = fsg_history_entry_get(fsgs->
history, bpidx);
630 score = fsg_hist_entry_score(hist_entry);
631 assert(fsgs->
frame == fsg_hist_entry_frame(hist_entry));
633 l = fsg_hist_entry_fsglink(hist_entry);
636 d = l ? fsg_link_to_state(l) : fsg_model_start_state(fsgs->
639 lc = fsg_hist_entry_lc(hist_entry);
642 for (root = fsg_lextree_root(fsgs->
lextree, d);
643 root; root = root->sibling) {
646 if ((root->ctxt.bv[lc >> 5] & (1 << (lc & 0x001f))) &&
647 (hist_entry->rc.bv[rc >> 5] & (1 << (rc & 0x001f)))) {
655 newscore = score + root->logs2prob;
658 && (newscore
BETTER_THAN hmm_in_score(&root->hmm))) {
659 if (hmm_frame(&root->hmm) < nf) {
666 (
"[%5d] WordTrans bpidx[%d] -> pnode[%08x] (activated)\n",
667 fsgs->
frame, bpidx, (int32) root);
673 (
"[%5d] WordTrans bpidx[%d] -> pnode[%08x]\n",
674 fsgs->
frame, bpidx, (int32) root);
678 hmm_enter(&root->hmm, newscore, bpidx, nf);
687 fsg_search_step(
ps_search_t *search,
int frame_idx)
698 fsg_search_sen_active(fsgs);
708 fsg_search_hmm_eval(fsgs);
715 fsg_search_hmm_prune_prop(fsgs);
716 fsg_history_end_frame(fsgs->
history);
722 fsg_search_null_prop(fsgs);
723 fsg_history_end_frame(fsgs->
history);
729 fsg_search_word_trans(fsgs);
737 for (gn = fsgs->
pnode_active; gn; gn = gnode_next(gn)) {
739 hmm = fsg_pnode_hmmptr(pnode);
741 if (hmm_frame(hmm) == fsgs->
frame) {
746 assert(hmm_frame(hmm) == (fsgs->
frame + 1));
788 fsg_history_reset(fsgs->
history);
789 fsg_history_utt_start(fsgs->
history);
798 fsg_history_entry_add(fsgs->
history,
799 NULL, -1, 0, -1, silcipid, ctxt);
803 fsg_search_null_prop(fsgs);
806 fsg_search_word_trans(fsgs);
817 ptmr_reset(&fsgs->
perf);
818 ptmr_start(&fsgs->
perf);
835 for (gn = fsgs->
pnode_active; gn; gn = gnode_next(gn)) {
851 n_hist = fsg_history_n_entries(fsgs->
history);
852 fsgs->n_tot_frame += fsgs->
frame;
854 (
"%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n\n",
859 n_hist, (fsgs->
frame > 0) ? n_hist / fsgs->
frame : 0);
862 ptmr_stop(&fsgs->
perf);
864 cf = ps_search_acmod(fsgs)->output_frame;
866 double n_speech = (double) (cf + 1)
867 / cmd_ln_int32_r(ps_search_config(fsgs),
"-frate");
868 E_INFO(
"fsg %.2f CPU %.3f xRT\n",
869 fsgs->
perf.t_cpu, fsgs->
perf.t_cpu / n_speech);
870 E_INFO(
"fsg %.2f wall %.3f xRT\n",
871 fsgs->
perf.t_elapsed, fsgs->
perf.t_elapsed / n_speech);
879 fsg_search_find_exit(
fsg_search_t *fsgs,
int frame_idx,
int final, int32 *out_score)
883 int bpidx, frm, last_frm, besthist;
887 frame_idx = fsgs->
frame - 1;
888 last_frm = frm = frame_idx;
891 bpidx = fsg_history_n_entries(fsgs->
history) - 1;
893 hist_entry = fsg_history_entry_get(fsgs->
history, bpidx);
894 if (fsg_hist_entry_frame(hist_entry) <= frame_idx) {
895 frm = last_frm = fsg_hist_entry_frame(hist_entry);
909 while (frm == last_frm) {
913 fl = fsg_hist_entry_fsglink(hist_entry);
914 score = fsg_hist_entry_score(hist_entry);
920 if (score == bestscore && fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
925 || fsg_link_to_state(fl) == fsg_model_final_state(fsg)) {
934 hist_entry = fsg_history_entry_get(fsgs->
history, bpidx);
935 frm = fsg_hist_entry_frame(hist_entry);
939 if (besthist == -1) {
940 E_ERROR(
"Final result does not match the grammar in frame %d\n", frame_idx);
946 *out_score = bestscore;
953 fsg_search_bestpath(
ps_search_t *search, int32 *out_score,
int backward)
964 if (search->
post == 0)
973 fsg_search_hyp(
ps_search_t *search, int32 *out_score)
976 dict_t *dict = ps_search_dict(search);
982 bpidx = fsg_search_find_exit(fsgs, fsgs->
frame, fsgs->
final, out_score);
993 if ((dag = fsg_search_lattice(search)) == NULL) {
994 E_WARN(
"Failed to obtain the lattice while bestpath enabled\n");
997 if ((link = fsg_search_bestpath(search, out_score, FALSE)) == NULL) {
998 E_WARN(
"Failed to find the bestpath in a lattice\n");
1008 fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
1009 char const *baseword;
1012 bp = fsg_hist_entry_pred(hist_entry);
1013 wid = fsg_link_wid(fl);
1014 if (wid < 0 || fsg_model_is_filler(fsgs->
fsg, wid))
1016 baseword = dict_basestr(dict,
1018 fsg_model_word_str(fsgs->
fsg, wid)));
1019 len += strlen(baseword) + 1;
1027 search->
hyp_str = ckd_calloc(1, len);
1030 c = search->
hyp_str + len - 1;
1033 fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry);
1034 char const *baseword;
1037 bp = fsg_hist_entry_pred(hist_entry);
1038 wid = fsg_link_wid(fl);
1039 if (wid < 0 || fsg_model_is_filler(fsgs->
fsg, wid))
1041 baseword = dict_basestr(dict,
1043 fsg_model_word_str(fsgs->
fsg, wid)));
1044 len = strlen(baseword);
1046 memcpy(c, baseword, len);
1063 if ((bp = fsg_hist_entry_pred(hist_entry)) >= 0)
1064 ph = fsg_history_entry_get(fsgs->
history, bp);
1065 seg->
word = fsg_model_word_str(fsgs->
fsg, hist_entry->fsglink->wid);
1066 seg->
ef = fsg_hist_entry_frame(hist_entry);
1067 seg->
sf = ph ? fsg_hist_entry_frame(ph) + 1 : 0;
1069 if (seg->
sf > seg->
ef) seg->
sf = seg->
ef;
1076 seg->
ascr = hist_entry->score - ph->score - seg->
lscr;
1079 seg->
ascr = hist_entry->score - seg->
lscr;
1087 ckd_free(itor->
hist);
1101 fsg_seg_bp2itor(seg, itor->
hist[itor->
cur]);
1118 bpidx = fsg_search_find_exit(fsgs, fsgs->
frame, fsgs->
final, &out_score);
1128 if ((dag = fsg_search_lattice(search)) == NULL)
1130 if ((link = fsg_search_bestpath(search, &out_score, TRUE)) == NULL)
1139 itor = ckd_calloc(1,
sizeof(*itor));
1140 itor->
base.
vt = &fsg_segfuncs;
1147 bp = fsg_hist_entry_pred(hist_entry);
1159 itor->
hist[cur] = hist_entry;
1160 bp = fsg_hist_entry_pred(hist_entry);
1165 fsg_seg_bp2itor((
ps_seg_t *)itor, itor->hist[0]);
1180 if ((dag = fsg_search_lattice(search)) == NULL)
1182 if ((link = fsg_search_bestpath(search, NULL, TRUE)) == NULL)
1184 return search->
post;
1193 find_node(
ps_lattice_t *dag, fsg_model_t *fsg,
int sf, int32 wid, int32 node_id)
1197 for (node = dag->
nodes; node; node = node->
next)
1198 if ((node->
sf == sf) && (node->
wid == wid) && (node->
node_id == node_id))
1204 new_node(
ps_lattice_t *dag, fsg_model_t *fsg,
int sf,
int ef, int32 wid, int32 node_id, int32 ascr)
1208 node = find_node(dag, fsg, sf, wid, node_id);
1212 if (node->
lef == -1 || node->
lef < ef)
1214 if (node->
fef == -1 || node->
fef > ef)
1225 node->
fef = node->
lef = ef;
1244 glist_t start = NULL;
1248 for (node = dag->
nodes; node; node = node->
next) {
1249 if (node->
sf == 0 && node->
exits) {
1250 E_INFO(
"Start node %s.%d:%d:%d\n",
1251 fsg_model_word_str(fsgs->
fsg, node->
wid),
1253 start = glist_add_ptr(start, node);
1262 node = gnode_ptr(start);
1268 wid = fsg_model_word_add(fsgs->
fsg,
"<s>");
1269 if (fsgs->
fsg->silwords)
1270 bitvec_set(fsgs->
fsg->silwords, wid);
1271 node = new_node(dag, fsgs->
fsg, 0, 0, wid, -1, 0);
1272 for (st = start; st; st = gnode_next(st))
1287 for (node = dag->
nodes; node; node = node->
next) {
1289 E_INFO(
"End node %s.%d:%d:%d (%d)\n",
1290 fsg_model_word_str(fsgs->
fsg, node->
wid),
1292 end = glist_add_ptr(end, node);
1298 node = gnode_ptr(end);
1300 else if (nend == 0) {
1306 for (node = dag->
nodes; node; node = node->
next) {
1314 E_INFO(
"End node %s.%d:%d:%d (%d)\n",
1315 fsg_model_word_str(fsgs->
fsg, node->
wid),
1324 wid = fsg_model_word_add(fsgs->
fsg,
"</s>");
1325 if (fsgs->
fsg->silwords)
1326 bitvec_set(fsgs->
fsg->silwords, wid);
1327 node = new_node(dag, fsgs->
fsg, fsgs->
frame, fsgs->
frame, wid, -1, 0);
1330 for (st = end; st; st = gnode_next(st)) {
1346 q = glist_add_ptr(NULL, end);
1352 q = gnode_free(q, NULL);
1354 for (x = node->
entries; x; x = x->next) {
1358 q = glist_add_ptr(q, next);
1400 n = fsg_history_n_entries(fsgs->
history);
1401 for (i = 0; i < n; ++i) {
1407 if (fh->fsglink == NULL || fh->fsglink->wid == -1)
1420 ascr = fh->score - pfh->score;
1421 sf = pfh->frame + 1;
1434 new_node(dag, fsg, sf, fh->frame, fh->fsglink->wid, fsg_link_to_state(fh->fsglink), ascr);
1440 n = fsg_history_n_entries(fsgs->
history);
1441 for (i = 0; i < n; ++i) {
1443 fsg_arciter_t *itor;
1449 if (fh->fsglink == NULL || fh->fsglink->wid == -1)
1455 sf = pfh->frame + 1;
1456 ascr = fh->score - pfh->score;
1462 src = find_node(dag, fsg, sf, fh->fsglink->wid, fsg_link_to_state(fh->fsglink));
1465 for (itor = fsg_model_arcs(fsg, fsg_link_to_state(fh->fsglink));
1466 itor; itor = fsg_arciter_next(itor)) {
1467 fsg_link_t *link = fsg_arciter_get(itor);
1470 if (link->wid >= 0) {
1475 if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL)
1483 fsg_arciter_t *itor2;
1486 for (itor2 = fsg_model_arcs(fsg, fsg_link_to_state(link));
1487 itor2; itor2 = fsg_arciter_next(itor2)) {
1488 fsg_link_t *link = fsg_arciter_get(itor2);
1490 if (link->wid == -1)
1493 if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL) {
1503 if ((dag->
start = find_start_node(fsgs, dag)) == NULL) {
1504 E_WARN(
"Failed to find the start node\n");
1507 if ((dag->
end = find_end_node(fsgs, dag)) == NULL) {
1508 E_WARN(
"Failed to find the end node\n");
1513 E_INFO(
"lattice start node %s.%d end node %s.%d\n",
1515 fsg_model_word_str(fsg, dag->
end->
wid), dag->
end->
sf);
1521 for (node = dag->
nodes; node; node = node->
next) {
1523 fsg_model_word_str(fsg, node->
wid));
1533 mark_reachable(dag, dag->
end);
1537 int32 silpen, fillpen;
1539 silpen = (int32)(logmath_log(fsg->lmath,
1540 cmd_ln_float32_r(ps_search_config(fsgs),
"-silprob"))
1543 fillpen = (int32)(logmath_log(fsg->lmath,
1544 cmd_ln_float32_r(ps_search_config(fsgs),
"-fillprob"))