11// latbin/lattice-compose.cc
22
33// Copyright 2009-2011 Microsoft Corporation; Saarland University
4+ // 2022 Brno University of Technology
45
56// See ../../COPYING for clarification regarding multiple authors
67//
1718// See the Apache 2 License for the specific language governing permissions and
1819// limitations under the License.
1920
20-
2121#include " base/kaldi-common.h"
2222#include " util/common-utils.h"
2323#include " fstext/fstext-lib.h"
@@ -34,27 +34,37 @@ int main(int argc, char *argv[]) {
3434 using fst::StdArc;
3535
3636 const char *usage =
37- " Composes lattices (in transducer form, as type Lattice). Depending\n "
38- " on the command-line arguments, either composes lattices with lattices,\n "
39- " or lattices with FSTs (rspecifiers are assumed to be lattices, and\n "
40- " rxfilenames are assumed to be FSTs, which have their weights interpreted\n "
41- " as \" graph weights\" when converted into the Lattice format.\n "
37+ " Composes lattices (in transducer form, as type Lattice).\n "
38+ " Depending on the command-line arguments, either composes\n "
39+ " lattices with lattices, or lattices with a single FST or\n "
40+ " multiple FSTs (whose weights are interpreted as \" graph weights\" ).\n "
4241 " \n "
43- " Usage: lattice-compose [options] lattice-rspecifier1 "
44- " ( lattice-rspecifier2|fst-rxfilename2) lattice-wspecifier\n "
45- " e.g.: lattice-compose ark:1.lats ark:2.lats ark:composed.lats \n "
46- " or: lattice-compose ark:1.lats G.fst ark:composed.lats \n " ;
42+ " Usage: lattice-compose [options] < lattice-rspecifier1> "
43+ " < lattice-rspecifier2|fst-rxfilename2|fst-rspecifier2> < lattice-wspecifier> \n "
44+ " If the 2nd arg is an rspecifier, it is interpreted by default as a table of \n "
45+ " lattices, or as a table of FSTs if you specify --compose-with-fst=true. \n " ;
4746
4847 ParseOptions po (usage);
4948
5049 bool write_compact = true ;
5150 int32 num_states_cache = 50000 ;
5251 int32 phi_label = fst::kNoLabel ; // == -1
52+ int32 rho_label = fst::kNoLabel ; // == -1
53+ std::string compose_with_fst = " auto" ;
54+
5355 po.Register (" write-compact" , &write_compact, " If true, write in normal (compact) form." );
5456 po.Register (" phi-label" , &phi_label, " If >0, the label on backoff arcs of the LM" );
57+ po.Register (" rho-label" , &rho_label,
58+ " If >0, the label to forward lat1 paths not present in biasing graph fst2 "
59+ " (rho is input and output symbol on special arc in biasing graph fst2;"
60+ " rho is like phi (matches rest), but rho label is rewritten to the"
61+ " specific symbol from lat1)" );
5562 po.Register (" num-states-cache" , &num_states_cache,
5663 " Number of states we cache when mapping LM FST to lattice type. "
5764 " More -> more memory but faster." );
65+ po.Register (" compose-with-fst" , &compose_with_fst,
66+ " (true|false|auto) For auto arg2 is: rspecifier=lats, rxfilename=fst "
67+ " (old behavior), for true/false rspecifier is fst/lattice." );
5868 po.Read (argc, argv);
5969
6070 if (po.NumArgs () != 3 ) {
@@ -63,14 +73,28 @@ int main(int argc, char *argv[]) {
6373 }
6474
6575 KALDI_ASSERT (phi_label > 0 || phi_label == fst::kNoLabel ); // e.g. 0 not allowed.
76+ KALDI_ASSERT (rho_label > 0 || rho_label == fst::kNoLabel ); // e.g. 0 not allowed.
77+ if (phi_label > 0 && rho_label > 0 ) {
78+ KALDI_ERR << " You cannot set both 'phi_label' and 'rho_label' at the same time." ;
79+ }
80+
81+ { // convert 'compose_with_fst' to lowercase to support: true, True, TRUE
82+ std::string& str (compose_with_fst);
83+ std::transform (str.begin (), str.end (), str.begin (), (int (*)(int ))std::tolower); // lc
84+ }
85+ if (compose_with_fst != " auto" && compose_with_fst != " true" &&
86+ compose_with_fst != " false" ) {
87+ KALDI_ERR << " Unkown 'compose_with_fst' value : " << compose_with_fst
88+ << " , values are (auto|true|false)" ;
89+ }
6690
6791 std::string lats_rspecifier1 = po.GetArg (1 ),
6892 arg2 = po.GetArg (2 ),
6993 lats_wspecifier = po.GetArg (3 );
7094 int32 n_done = 0 , n_fail = 0 ;
7195
7296 SequentialLatticeReader lattice_reader1 (lats_rspecifier1);
73-
97+
7498 CompactLatticeWriter compact_lattice_writer;
7599 LatticeWriter lattice_writer;
76100
@@ -79,33 +103,48 @@ int main(int argc, char *argv[]) {
79103 else
80104 lattice_writer.Open (lats_wspecifier);
81105
82- if (ClassifyRspecifier (arg2, NULL , NULL ) == kNoRspecifier ) {
106+ bool arg2_is_rxfilename = (ClassifyRspecifier (arg2, NULL , NULL ) == kNoRspecifier );
107+
108+ if (arg2_is_rxfilename && (compose_with_fst == " auto" || compose_with_fst == " true" )) {
109+ /* *
110+ * arg2 is rxfilename that contains a single fst
111+ * - compose arg1 lattices with single fst in arg2
112+ */
83113 std::string fst_rxfilename = arg2;
84- VectorFst<StdArc> *fst2 = fst::ReadFstKaldi (fst_rxfilename);
85- // mapped_fst2 is fst2 interpreted using the LatticeWeight semiring,
86- // with all the cost on the first member of the pair (since we're
87- // assuming it's a graph weight).
114+ VectorFst<StdArc>* fst2 = fst::ReadFstKaldi (fst_rxfilename);
115+
116+ // Make sure fst2 is sorted on ilabel
88117 if (fst2->Properties (fst::kILabelSorted , true ) == 0 ) {
89- // Make sure fst2 is sorted on ilabel.
90118 fst::ILabelCompare<StdArc> ilabel_comp;
91119 ArcSort (fst2, ilabel_comp);
92120 }
121+
93122 if (phi_label > 0 )
94123 PropagateFinal (phi_label, fst2);
95124
125+ // mapped_fst2 is fst2 interpreted using the LatticeWeight semiring,
126+ // with all the cost on the first member of the pair (since we're
127+ // assuming it's a graph weight).
96128 fst::CacheOptions cache_opts (true , num_states_cache);
97129 fst::MapFstOptions mapfst_opts (cache_opts);
98130 fst::StdToLatticeMapper<BaseFloat> mapper;
99131 fst::MapFst<StdArc, LatticeArc, fst::StdToLatticeMapper<BaseFloat> >
100132 mapped_fst2 (*fst2, mapper, mapfst_opts);
133+
101134 for (; !lattice_reader1.Done (); lattice_reader1.Next ()) {
102135 std::string key = lattice_reader1.Key ();
103136 KALDI_VLOG (1 ) << " Processing lattice for key " << key;
104137 Lattice lat1 = lattice_reader1.Value ();
105138 ArcSort (&lat1, fst::OLabelCompare<LatticeArc>());
139+
106140 Lattice composed_lat;
107- if (phi_label > 0 ) PhiCompose (lat1, mapped_fst2, phi_label, &composed_lat);
108- else Compose (lat1, mapped_fst2, &composed_lat);
141+ if (phi_label > 0 ) {
142+ PhiCompose (lat1, mapped_fst2, phi_label, &composed_lat);
143+ } else if (rho_label > 0 ) {
144+ RhoCompose (lat1, mapped_fst2, rho_label, &composed_lat);
145+ } else {
146+ Compose (lat1, mapped_fst2, &composed_lat);
147+ }
109148 if (composed_lat.Start () == fst::kNoStateId ) {
110149 KALDI_WARN << " Empty lattice for utterance " << key << " (incompatible LM?)" ;
111150 n_fail++;
@@ -121,13 +160,27 @@ int main(int argc, char *argv[]) {
121160 }
122161 }
123162 delete fst2;
124- } else {
163+
164+ } else if (arg2_is_rxfilename && compose_with_fst == " false" ) {
165+ /* *
166+ * arg2 is rxfilename that contains a single lattice
167+ * - would it make sense to do this? Not implementing...
168+ */
169+ KALDI_ERR << " Unimplemented..." ;
170+
171+ } else if (!arg2_is_rxfilename &&
172+ (compose_with_fst == " auto" || compose_with_fst == " false" )) {
173+ /* *
174+ * arg2 is rspecifier that contains a table of lattices
175+ * - composing arg1 lattices with arg2 lattices
176+ */
125177 std::string lats_rspecifier2 = arg2;
126178 // This is the case similar to lattice-interp.cc, where we
127179 // read in another set of lattices and compose them. But in this
128180 // case we don't do any projection; we assume that the user has already
129181 // done this (e.g. with lattice-project).
130182 RandomAccessLatticeReader lattice_reader2 (lats_rspecifier2);
183+
131184 for (; !lattice_reader1.Done (); lattice_reader1.Next ()) {
132185 std::string key = lattice_reader1.Key ();
133186 KALDI_VLOG (1 ) << " Processing lattice for key " << key;
@@ -139,6 +192,7 @@ int main(int argc, char *argv[]) {
139192 n_fail++;
140193 continue ;
141194 }
195+
142196 Lattice lat2 = lattice_reader2.Value (key);
143197 // Make sure that either lat2 is ilabel sorted
144198 // or lat1 is olabel sorted, to ensure that
@@ -150,27 +204,103 @@ int main(int argc, char *argv[]) {
150204 fst::ArcSort (&lat2, ilabel_comp);
151205 }
152206
153- Lattice lat_out;
207+ Lattice composed_lat;
208+ // Btw, can the lat2 lattice contin phi/rho symbols ?
154209 if (phi_label > 0 ) {
155210 PropagateFinal (phi_label, &lat2);
156- PhiCompose (lat1, lat2, phi_label, &lat_out);
211+ PhiCompose (lat1, lat2, phi_label, &composed_lat);
212+ } else if (rho_label > 0 ) {
213+ RhoCompose (lat1, lat2, rho_label, &composed_lat);
214+ } else {
215+ Compose (lat1, lat2, &composed_lat);
216+ }
217+ if (composed_lat.Start () == fst::kNoStateId ) {
218+ KALDI_WARN << " Empty lattice for utterance " << key << " (incompatible LM?)" ;
219+ n_fail++;
220+ } else {
221+ if (write_compact) {
222+ CompactLattice clat;
223+ ConvertLattice (composed_lat, &clat);
224+ compact_lattice_writer.Write (key, clat);
225+ } else {
226+ lattice_writer.Write (key, composed_lat);
227+ }
228+ n_done++;
229+ }
230+ }
231+
232+ } else if (!arg2_is_rxfilename && compose_with_fst == " true" ) {
233+ /* *
234+ * arg2 is rspecifier that contains a table of fsts
235+ * - composing arg1 lattices with arg2 fsts
236+ */
237+ std::string fst_rspecifier2 = arg2;
238+ RandomAccessTableReader<fst::VectorFstHolder> fst_reader2 (fst_rspecifier2);
239+
240+ for (; !lattice_reader1.Done (); lattice_reader1.Next ()) {
241+ std::string key = lattice_reader1.Key ();
242+ KALDI_VLOG (1 ) << " Processing lattice for key " << key;
243+ Lattice lat1 = lattice_reader1.Value ();
244+ lattice_reader1.FreeCurrent ();
245+
246+ if (!fst_reader2.HasKey (key)) {
247+ KALDI_WARN << " Not producing output for utterance " << key
248+ << " because not present in second table." ;
249+ n_fail++;
250+ continue ;
251+ }
252+
253+ VectorFst<StdArc> fst2 = fst_reader2.Value (key);
254+ // Make sure fst2 is sorted on ilabel
255+ if (fst2.Properties (fst::kILabelSorted , true ) == 0 ) {
256+ fst::ILabelCompare<StdArc> ilabel_comp;
257+ fst::ArcSort (&fst2, ilabel_comp);
258+ }
259+
260+ // for composing with LM-fsts, it makes all fst2 states final
261+ if (phi_label > 0 )
262+ PropagateFinal (phi_label, &fst2);
263+
264+ // mapped_fst2 is fst2 interpreted using the LatticeWeight semiring,
265+ // with all the cost on the first member of the pair (since we're
266+ // assuming it's a graph weight).
267+ fst::CacheOptions cache_opts (true , num_states_cache);
268+ fst::MapFstOptions mapfst_opts (cache_opts);
269+ fst::StdToLatticeMapper<BaseFloat> mapper;
270+ fst::MapFst<StdArc, LatticeArc, fst::StdToLatticeMapper<BaseFloat> >
271+ mapped_fst2 (fst2, mapper, mapfst_opts);
272+
273+ // sort lat1 on olabel.
274+ ArcSort (&lat1, fst::OLabelCompare<LatticeArc>());
275+
276+ Lattice composed_lat;
277+ if (phi_label > 0 ) {
278+ PhiCompose (lat1, mapped_fst2, phi_label, &composed_lat);
279+ } else if (rho_label > 0 ) {
280+ RhoCompose (lat1, mapped_fst2, rho_label, &composed_lat);
157281 } else {
158- Compose (lat1, lat2 , &lat_out );
282+ Compose (lat1, mapped_fst2 , &composed_lat );
159283 }
160- if (lat_out.Start () == fst::kNoStateId ) {
284+
285+ if (composed_lat.Start () == fst::kNoStateId ) {
161286 KALDI_WARN << " Empty lattice for utterance " << key << " (incompatible LM?)" ;
162287 n_fail++;
163288 } else {
164289 if (write_compact) {
165- CompactLattice clat_out ;
166- ConvertLattice (lat_out , &clat_out );
167- compact_lattice_writer.Write (key, clat_out );
290+ CompactLattice clat ;
291+ ConvertLattice (composed_lat , &clat );
292+ compact_lattice_writer.Write (key, clat );
168293 } else {
169- lattice_writer.Write (key, lat_out );
294+ lattice_writer.Write (key, composed_lat );
170295 }
171296 n_done++;
172297 }
173298 }
299+ } else {
300+ /* *
301+ * none of the 'if-else-if' applied...
302+ */
303+ KALDI_ERR << " You should never reach here..." ;
174304 }
175305
176306 KALDI_LOG << " Done " << n_done << " lattices; failed for "
0 commit comments