diff --git a/src/bin/latgen-faster-mapped-combine.cc b/src/bin/latgen-faster-mapped-combine.cc new file mode 100644 index 00000000000..ae5946d9e8e --- /dev/null +++ b/src/bin/latgen-faster-mapped-combine.cc @@ -0,0 +1,179 @@ +// bin/latgen-faster-mapped.cc + +// Copyright 2009-2012 Microsoft Corporation, Karel Vesely +// 2013 Johns Hopkins University (author: Daniel Povey) +// 2014 Guoguo Chen + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + + +#include "base/kaldi-common.h" +#include "util/common-utils.h" +#include "tree/context-dep.h" +#include "hmm/transition-model.h" +#include "fstext/fstext-lib.h" +#include "decoder/decoder-wrappers.h" +#include "decoder/decodable-matrix.h" +#include "base/timer.h" + + +int main(int argc, char *argv[]) { + try { + using namespace kaldi; + typedef kaldi::int32 int32; + using fst::SymbolTable; + using fst::Fst; + using fst::StdArc; + + const char *usage = + "Generate lattices, reading log-likelihoods as matrices\n" + " (model is needed only for the integer mappings in its transition-model)\n" + "Usage: latgen-faster-mapped [options] trans-model-in (fst-in|fsts-rspecifier) loglikes-rspecifier" + " lattice-wspecifier [ words-wspecifier [alignments-wspecifier] ]\n"; + ParseOptions po(usage); + Timer timer; + bool allow_partial = false; + BaseFloat acoustic_scale = 0.1; + LatticeFasterDecoderCombineConfig config; + + std::string word_syms_filename; + config.Register(&po); + po.Register("acoustic-scale", &acoustic_scale, "Scaling factor for acoustic likelihoods"); + + po.Register("word-symbol-table", &word_syms_filename, "Symbol table for words [for debug output]"); + po.Register("allow-partial", &allow_partial, "If true, produce output even if end state was not reached."); + + po.Read(argc, argv); + + if (po.NumArgs() < 4 || po.NumArgs() > 6) { + po.PrintUsage(); + exit(1); + } + + std::string model_in_filename = po.GetArg(1), + fst_in_str = po.GetArg(2), + feature_rspecifier = po.GetArg(3), + lattice_wspecifier = po.GetArg(4), + words_wspecifier = po.GetOptArg(5), + alignment_wspecifier = po.GetOptArg(6); + + TransitionModel trans_model; + ReadKaldiObject(model_in_filename, &trans_model); + + bool determinize = config.determinize_lattice; + CompactLatticeWriter compact_lattice_writer; + LatticeWriter lattice_writer; + if (! (determinize ? compact_lattice_writer.Open(lattice_wspecifier) + : lattice_writer.Open(lattice_wspecifier))) + KALDI_ERR << "Could not open table for writing lattices: " + << lattice_wspecifier; + + Int32VectorWriter words_writer(words_wspecifier); + + Int32VectorWriter alignment_writer(alignment_wspecifier); + + fst::SymbolTable *word_syms = NULL; + if (word_syms_filename != "") + if (!(word_syms = fst::SymbolTable::ReadText(word_syms_filename))) + KALDI_ERR << "Could not read symbol table from file " + << word_syms_filename; + + double tot_like = 0.0; + kaldi::int64 frame_count = 0; + int num_success = 0, num_fail = 0; + + if (ClassifyRspecifier(fst_in_str, NULL, NULL) == kNoRspecifier) { + SequentialBaseFloatMatrixReader loglike_reader(feature_rspecifier); + // Input FST is just one FST, not a table of FSTs. + Fst *decode_fst = fst::ReadFstKaldiGeneric(fst_in_str); + timer.Reset(); + + { + LatticeFasterDecoderCombine decoder(*decode_fst, config); + + for (; !loglike_reader.Done(); loglike_reader.Next()) { + std::string utt = loglike_reader.Key(); + Matrix loglikes (loglike_reader.Value()); + loglike_reader.FreeCurrent(); + if (loglikes.NumRows() == 0) { + KALDI_WARN << "Zero-length utterance: " << utt; + num_fail++; + continue; + } + + DecodableMatrixScaledMapped decodable(trans_model, loglikes, acoustic_scale); + + double like; + if (DecodeUtteranceLatticeFasterCombine( + decoder, decodable, trans_model, word_syms, utt, + acoustic_scale, determinize, allow_partial, &alignment_writer, + &words_writer, &compact_lattice_writer, &lattice_writer, + &like)) { + tot_like += like; + frame_count += loglikes.NumRows(); + num_success++; + } else num_fail++; + } + } + delete decode_fst; // delete this only after decoder goes out of scope. + } else { // We have different FSTs for different utterances. + SequentialTableReader fst_reader(fst_in_str); + RandomAccessBaseFloatMatrixReader loglike_reader(feature_rspecifier); + for (; !fst_reader.Done(); fst_reader.Next()) { + std::string utt = fst_reader.Key(); + if (!loglike_reader.HasKey(utt)) { + KALDI_WARN << "Not decoding utterance " << utt + << " because no loglikes available."; + num_fail++; + continue; + } + const Matrix &loglikes = loglike_reader.Value(utt); + if (loglikes.NumRows() == 0) { + KALDI_WARN << "Zero-length utterance: " << utt; + num_fail++; + continue; + } + LatticeFasterDecoderCombine decoder(fst_reader.Value(), config); + DecodableMatrixScaledMapped decodable(trans_model, loglikes, acoustic_scale); + double like; + if (DecodeUtteranceLatticeFasterCombine( + decoder, decodable, trans_model, word_syms, utt, acoustic_scale, + determinize, allow_partial, &alignment_writer, &words_writer, + &compact_lattice_writer, &lattice_writer, &like)) { + tot_like += like; + frame_count += loglikes.NumRows(); + num_success++; + } else num_fail++; + } + } + + double elapsed = timer.Elapsed(); + KALDI_LOG << "Time taken "<< elapsed + << "s: real-time factor assuming 100 frames/sec is " + << (elapsed*100.0/frame_count); + KALDI_LOG << "Done " << num_success << " utterances, failed for " + << num_fail; + KALDI_LOG << "Overall log-likelihood per frame is " << (tot_like/frame_count) << " over " + << frame_count<<" frames."; + + delete word_syms; + if (num_success != 0) return 0; + else return 1; + } catch(const std::exception &e) { + std::cerr << e.what(); + return -1; + } +} diff --git a/src/decoder/lattice-faster-decoder-combine.cc b/src/decoder/lattice-faster-decoder-combine.cc index 67c4bfe7e8e..63e7e31224b 100644 --- a/src/decoder/lattice-faster-decoder-combine.cc +++ b/src/decoder/lattice-faster-decoder-combine.cc @@ -892,6 +892,7 @@ void LatticeFasterDecoderCombineTpl::ProcessForFrame( } // end of while loop frame_processed_[frame] = true; frame_processed_[frame + 1] = false; + KALDI_VLOG(6) << "toks after: " << cur_toks_.size(); } diff --git a/src/decoder/lattice-faster-decoder-combine.h b/src/decoder/lattice-faster-decoder-combine.h index f4d74a5acd8..1c27a4ecb84 100644 --- a/src/decoder/lattice-faster-decoder-combine.h +++ b/src/decoder/lattice-faster-decoder-combine.h @@ -32,6 +32,7 @@ #include "lat/kaldi-lattice.h" #include "decoder/grammar-fst.h" #include "decoder/lattice-faster-decoder.h" +#include "memory.h" namespace kaldi { @@ -242,7 +243,8 @@ class LatticeFasterDecoderCombineTpl { using Weight = typename Arc::Weight; using ForwardLinkT = decodercombine::ForwardLink; - using StateIdToTokenMap = typename std::unordered_map; + //using StateIdToTokenMap = typename std::unordered_map; + using StateIdToTokenMap = typename std::unordered_map, std::equal_to, fkaldi::PoolAllocator>>; using IterType = typename StateIdToTokenMap::const_iterator; // Instantiate this class once for each thing you have to decode. diff --git a/src/decoder/memory.h b/src/decoder/memory.h new file mode 100644 index 00000000000..784712eb0c3 --- /dev/null +++ b/src/decoder/memory.h @@ -0,0 +1,421 @@ +// See www.openfst.org for extensive documentation on this weighted +// finite-state transducer library. +// +// FST memory utilities. + +#ifndef FFST_MEMORY_H_ +#define FFST_MEMORY_H_ + +#include +#include +#include +#include + +#include +#include +#include + +namespace fkaldi { +using namespace fst; + +// Default block allocation size. +constexpr int kAllocSize = 64; + +// Minimum number of allocations per block. +constexpr int kAllocFit = 4; + +// Base class for MemoryArena that allows (e.g.) MemoryArenaCollection to +// easily manipulate collections of variously sized arenas. +class MemoryArenaBase { + public: + virtual ~MemoryArenaBase() {} + virtual size_t Size() const = 0; +}; + +// Allocates 'size' unintialized memory chunks of size sizeof(T) from underlying +// blocks of (at least) size 'block_size * sizeof(T)'. All blocks are freed when +// this class is deleted. Result of allocate() will be aligned to sizeof(T). +template +class MemoryArena : public MemoryArenaBase { + public: + explicit MemoryArena(size_t block_size = kAllocSize) + : block_size_(block_size * sizeof(T)), block_pos_(0) { + blocks_.emplace_front(new char[block_size_]); + } + + void *Allocate(size_t size) { + const auto byte_size = size * sizeof(T); + if (byte_size * kAllocFit > block_size_) { + // Large block; adds new large block. + auto *ptr = new char[byte_size]; + blocks_.emplace_back(ptr); + return ptr; + } + if (block_pos_ + byte_size > block_size_) { + // Doesn't fit; adds new standard block. + auto *ptr = new char[block_size_]; + block_pos_ = 0; + blocks_.emplace_front(ptr); + } + // Fits; uses current block. + auto *ptr = blocks_.front().get() + block_pos_; + block_pos_ += byte_size; + return ptr; + } + + size_t Size() const override { return sizeof(T); } + + private: + size_t block_size_; // Default block size in bytes. + size_t block_pos_; // Current position in block in bytes. + std::list> blocks_; // List of allocated blocks. +}; + +// Base class for MemoryPool that allows (e.g.) MemoryPoolCollection to easily +// manipulate collections of variously sized pools. +class MemoryPoolBase { + public: + virtual ~MemoryPoolBase() {} + virtual size_t Size() const = 0; +}; + +// Allocates and frees initially uninitialized memory chunks of size sizeof(T). +// Keeps an internal list of freed chunks that are reused (as is) on the next +// allocation if available. Chunks are constructed in blocks of size +// 'pool_size'. All memory is freed when the class is deleted. The result of +// Allocate() will be suitably memory-aligned. +// +// Combined with placement operator new and destroy fucntions for the T class, +// this can be used to improve allocation efficiency. See nlp/fst/lib/visit.h +// (global new) and nlp/fst/lib/dfs-visit.h (class new) for examples. +template +class MemoryPool : public MemoryPoolBase { + public: + struct Link { + char buf[sizeof(T)]; + Link *next; + }; + + // 'pool_size' specifies the size of the initial pool and how it is extended. + explicit MemoryPool(size_t pool_size = kAllocSize) + : mem_arena_(pool_size), free_list_(nullptr) {} + + void *Allocate() { + if (free_list_ == nullptr) { + auto *link = static_cast(mem_arena_.Allocate(1)); + link->next = nullptr; + return link; + } else { + auto *link = free_list_; + free_list_ = link->next; + return link; + } + } + + void Free(void *ptr) { + if (ptr) { + auto *link = static_cast(ptr); + link->next = free_list_; + free_list_ = link; + } + } + + size_t Size() const override { return sizeof(T); } + + private: + MemoryArena mem_arena_; + Link *free_list_; + + MemoryPool(const MemoryPool &) = delete; + MemoryPool &operator=(const MemoryPool &) = delete; +}; + +// Stores a collection of memory arenas. +class MemoryArenaCollection { + public: + // 'block_size' specifies the block size of the arenas. + explicit MemoryArenaCollection(size_t block_size = kAllocSize) + : block_size_(block_size), ref_count_(1) {} + + template + MemoryArena *Arena() { + if (sizeof(T) >= arenas_.size()) arenas_.resize(sizeof(T) + 1); + MemoryArenaBase *arena = arenas_[sizeof(T)].get(); + if (arena == nullptr) { + arena = new MemoryArena(block_size_); + arenas_[sizeof(T)].reset(arena); + } + return static_cast *>(arena); + } + + size_t BlockSize() const { return block_size_; } + + size_t RefCount() const { return ref_count_; } + + size_t IncrRefCount() { return ++ref_count_; } + + size_t DecrRefCount() { return --ref_count_; } + + private: + size_t block_size_; + size_t ref_count_; + std::vector> arenas_; +}; + +// Stores a collection of memory pools +class MemoryPoolCollection { + public: + // 'pool_size' specifies the size of initial pool and how it is extended. + explicit MemoryPoolCollection(size_t pool_size = kAllocSize) + : pool_size_(pool_size), ref_count_(1) {} + + template + MemoryPool *Pool() { + if (sizeof(T) >= pools_.size()) pools_.resize(sizeof(T) + 1); + MemoryPoolBase *pool = pools_[sizeof(T)].get(); + if (pool == nullptr) { + pool = new MemoryPool(pool_size_); + pools_[sizeof(T)].reset(pool); + } + return static_cast *>(pool); + } + + size_t PoolSize() const { return pool_size_; } + + size_t RefCount() const { return ref_count_; } + + size_t IncrRefCount() { return ++ref_count_; } + + size_t DecrRefCount() { return --ref_count_; } + + private: + size_t pool_size_; + size_t ref_count_; + std::vector> pools_; +}; + +// STL allocator using memory arenas. Memory is allocated from underlying +// blocks of size 'block_size * sizeof(T)'. Memory is freed only when all +// objects using this allocator are destroyed and there is otherwise no reuse +// (unlike PoolAllocator). +// +// This allocator has object-local state so it should not be used with splicing +// or swapping operations between objects created with different allocators nor +// should it be used if copies must be thread-safe. The result of allocate() +// will be suitably memory-aligned. +template +class BlockAllocator { + public: + using Allocator = std::allocator; + using size_type = typename Allocator::size_type; + using difference_type = typename Allocator::difference_type; + using pointer = typename Allocator::pointer; + using const_pointer = typename Allocator::const_pointer; + using reference = typename Allocator::reference; + using const_reference = typename Allocator::const_reference; + using value_type = typename Allocator::value_type; + + template + struct rebind { + using other = BlockAllocator; + }; + + explicit BlockAllocator(size_t block_size = kAllocSize) + : arenas_(new MemoryArenaCollection(block_size)) {} + + BlockAllocator(const BlockAllocator &arena_alloc) + : arenas_(arena_alloc.Arenas()) { + Arenas()->IncrRefCount(); + } + + template + explicit BlockAllocator(const BlockAllocator &arena_alloc) + : arenas_(arena_alloc.Arenas()) { + Arenas()->IncrRefCount(); + } + + ~BlockAllocator() { + if (Arenas()->DecrRefCount() == 0) delete Arenas(); + } + + pointer address(reference ref) const { return Allocator().address(ref); } + + const_pointer address(const_reference ref) const { + return Allocator().address(ref); + } + + size_type max_size() const { return Allocator().max_size(); } + + template + void construct(U *p, Args &&... args) { + Allocator().construct(p, std::forward(args)...); + } + + void destroy(pointer p) { Allocator().destroy(p); } + + pointer allocate(size_type n, const void *hint = nullptr) { + if (n * kAllocFit <= kAllocSize) { + return static_cast(Arena()->Allocate(n)); + } else { + return Allocator().allocate(n, hint); + } + } + + void deallocate(pointer p, size_type n) { + if (n * kAllocFit > kAllocSize) Allocator().deallocate(p, n); + } + + MemoryArenaCollection *Arenas() const { return arenas_; } + + //BlockAllocator operator=(const BlockAllocator &) { assert(0); } + private: + MemoryArena *Arena() { return arenas_->Arena(); } + + MemoryArenaCollection *arenas_; + +}; + +template +bool operator==(const BlockAllocator &alloc1, + const BlockAllocator &alloc2) { + return false; +} + +template +bool operator!=(const BlockAllocator &alloc1, + const BlockAllocator &alloc2) { + return true; +} + +// STL allocator using memory pools. Memory is allocated from underlying +// blocks of size 'block_size * sizeof(T)'. Keeps an internal list of freed +// chunks thare are reused on the next allocation. +// +// This allocator has object-local state so it should not be used with splicing +// or swapping operations between objects created with different allocators nor +// should it be used if copies must be thread-safe. The result of allocate() +// will be suitably memory-aligned. +template +class PoolAllocator { + public: + using Allocator = std::allocator; + using size_type = typename Allocator::size_type; + using difference_type = typename Allocator::difference_type; + using pointer = typename Allocator::pointer; + using const_pointer = typename Allocator::const_pointer; + using reference = typename Allocator::reference; + using const_reference = typename Allocator::const_reference; + using value_type = typename Allocator::value_type; + + template + struct rebind { + using other = PoolAllocator; + }; + + explicit PoolAllocator(size_t pool_size = kAllocSize) + : pools_(new MemoryPoolCollection(pool_size)) {} + + PoolAllocator(const PoolAllocator &pool_alloc) + : pools_(pool_alloc.Pools()) { + Pools()->IncrRefCount(); + } + + template + explicit PoolAllocator(const PoolAllocator &pool_alloc) + : pools_(pool_alloc.Pools()) { + Pools()->IncrRefCount(); + } + + ~PoolAllocator() { + if (Pools()->DecrRefCount() == 0) delete Pools(); + } + + pointer address(reference ref) const { return Allocator().address(ref); } + + const_pointer address(const_reference ref) const { + return Allocator().address(ref); + } + + size_type max_size() const { return Allocator().max_size(); } + + template + void construct(U *p, Args &&... args) { + Allocator().construct(p, std::forward(args)...); + } + + void destroy(pointer p) { Allocator().destroy(p); } + + pointer allocate(size_type n, const void *hint = nullptr) { + if (n == 1) { + return static_cast(Pool<1>()->Allocate()); + } else if (n == 2) { + return static_cast(Pool<2>()->Allocate()); + } else if (n <= 4) { + return static_cast(Pool<4>()->Allocate()); + } else if (n <= 8) { + return static_cast(Pool<8>()->Allocate()); + } else if (n <= 16) { + return static_cast(Pool<16>()->Allocate()); + } else if (n <= 32) { + return static_cast(Pool<32>()->Allocate()); + } else if (n <= 64) { + return static_cast(Pool<64>()->Allocate()); + } else { + return Allocator().allocate(n, hint); + } + } + + void deallocate(pointer p, size_type n) { + if (n == 1) { + Pool<1>()->Free(p); + } else if (n == 2) { + Pool<2>()->Free(p); + } else if (n <= 4) { + Pool<4>()->Free(p); + } else if (n <= 8) { + Pool<8>()->Free(p); + } else if (n <= 16) { + Pool<16>()->Free(p); + } else if (n <= 32) { + Pool<32>()->Free(p); + } else if (n <= 64) { + Pool<64>()->Free(p); + } else { + Allocator().deallocate(p, n); + } + } + + MemoryPoolCollection *Pools() const { return pools_; } + //PoolAllocator operator=(const PoolAllocator &) { assert(0); } + + private: + template + struct TN { + T buf[n]; + }; + + template + MemoryPool> *Pool() { + return pools_->Pool>(); + } + + MemoryPoolCollection *pools_; + +}; + +template +bool operator==(const PoolAllocator &alloc1, + const PoolAllocator &alloc2) { + return false; +} + +template +bool operator!=(const PoolAllocator &alloc1, + const PoolAllocator &alloc2) { + return true; +} + +} // namespace fst + +#endif // FFST_MEMORY_H_ +