diff --git a/srcbpatch/streamreplacer.cpp b/srcbpatch/streamreplacer.cpp index ae27e38..c57b9f9 100644 --- a/srcbpatch/streamreplacer.cpp +++ b/srcbpatch/streamreplacer.cpp @@ -66,6 +66,51 @@ class ReplacerWithNext: public StreamReplacer }; +/// +/// a class with some common methods for all replacers +/// +class BaseReplacer: public ReplacerWithNext +{ +protected: + /// + /// Sends target to next replacers, and resets partial match index to zero + /// + /// the array we need to send + void SendFurther(const span& target) const + { + for (const char c : target) + { + pNext_->DoReplacements(c, false); + } + } +}; + +/// +/// a class with some common methods for all replacers including cache +/// +class BaseReplacerWithCache: public BaseReplacer +{ +protected: + /// + /// Clean srcMatchedLength bytes of cache from the beginning + /// + /// number of bytes we have to clear + void CleanTheCache(size_t srcMatchedLength) const + { + shift_left(cachedData_.data(), + cachedData_.data() + cachedAmount_, + static_cast::difference_type>(srcMatchedLength)); + cachedAmount_ -= srcMatchedLength; + } + +protected: + mutable size_t cachedAmount_ = 0; // we cached this amount of data + + // this is used to hold temporary data while the logic is + // looking for the new beginning of the cached value + mutable vector cachedData_; +}; + //-------------------------------------------------- struct ReplacerPairHolder @@ -81,8 +126,9 @@ struct ReplacerPairHolder }; -//-------------------------------------------------- -class UsualReplacer final : public ReplacerWithNext +//Description???? +/// The class finds the lexeme src_ and replaces it to trg_, the src_ and trg_ are non empty strings +class UsualReplacer final : public BaseReplacerWithCache { public: UsualReplacer(unique_ptr& src, // what to replace @@ -96,14 +142,18 @@ class UsualReplacer final : public ReplacerWithNext void DoReplacements(const char toProcess, const bool aEod) const override; protected: + /// + /// We got the 'end' character so there are no match -> we should pass further all the cache + /// + /// character received along with end of data sign + void DoReplacementsAtTheEndOfTheData(const char toProcess) const + { + SendFurther(std::span (cachedData_.data(), cachedAmount_)); + CleanTheCache(cachedAmount_); + pNext_->DoReplacements(toProcess, true); + } const span& src_; // what to replace const span& trg_; // with what - - mutable size_t cachedAmount_ = 0; // we cached this amount of data - - // this is used to hold temporary data while the logic is - // looking for the new beginning of the cached value - mutable vector cachedData_; }; @@ -114,50 +164,25 @@ void UsualReplacer::DoReplacements(const char toProcess, const bool aEod) const throw logic_error("Replacement chain has been broken. Communicate with maintainer"); } - // no more data - // just send cached amount - if (aEod) + if (aEod) [[unlikely]] { - for (size_t i = 0; i < cachedAmount_; ++i) - { - pNext_->DoReplacements(src_[i], false); - } - cachedAmount_ = 0; - pNext_->DoReplacements(toProcess, true); + DoReplacementsAtTheEndOfTheData(toProcess); return; } - if (src_[cachedAmount_] == toProcess) // check for match + cachedData_[cachedAmount_++] = toProcess; + // our cachedData_ should contain only prefix of src_, otherwise -> clean the cache from the beginning + while (cachedAmount_ > 0 && memcmp(cachedData_.data(), src_.data(), cachedAmount_) != 0) { - if (++cachedAmount_ >= src_.size()) - {// send target - do replacement - for (size_t q = 0; q < trg_.size(); ++q) { pNext_->DoReplacements(trg_[q], false); } - cachedAmount_ = 0; - } - return; + SendFurther(std::span (cachedData_.data(), 1)); + CleanTheCache(1); } - // here: toProcess is not our char - // lets check for fast track (255/256 probability) - if (0 == cachedAmount_) + if (cachedAmount_ == src_.size()) { - pNext_->DoReplacements(toProcess, false); - return; + SendFurther(trg_); + CleanTheCache(cachedAmount_); } - - // here: We have some cached data - // at least 1 char need to be send further - // remaining cached data including toProcess need to be reprocessed for match - - memcpy(cachedData_.data(), src_.data(), cachedAmount_); - cachedData_[cachedAmount_++]= toProcess; - size_t i = 0; - do - { - pNext_->DoReplacements(cachedData_[i++], false); // send 1 byte after another - } while (0 != memcmp(src_.data(), cachedData_.data() + i, --cachedAmount_)); - // Everything that was needed has already been sent - // cachedAmount_ is zero or greater } @@ -180,8 +205,9 @@ static unique_ptr CreateSimpleReplacer( /// |--SRC 1 TRG 1 | /// O - |-- ... | - o /// |--SRC N TRG N | -/// -class ChoiceReplacer final : public ReplacerWithNext +/// +/// Description???? +class ChoiceReplacer final : public BaseReplacerWithCache { typedef struct { @@ -257,25 +283,10 @@ class ChoiceReplacer final : public ReplacerWithNext /// the array we need to send void SendAndResetPartialMatch(const span& target) const { - for (const char c : target) - { - pNext_->DoReplacements(c, false); - } + SendFurther(target); indexOfPartialMatch_ = 0; } - /// - /// Clean srcMatchedLength bytes of cache from the beginning - /// - /// number of bytes we have to clear - void CleanTheCache(size_t srcMatchedLength) const - { - shift_left(cachedData_.data(), - cachedData_.data() + cachedAmount_, - static_cast::difference_type>(srcMatchedLength)); - cachedAmount_ -= srcMatchedLength; - } - /// /// The end of the data sign has been received and the cached data need to be either send or replaced & send /// @@ -304,12 +315,7 @@ class ChoiceReplacer final : public ReplacerWithNext // our pairs sorted by priority - only one of them could be replaced for concrete pos vector rpairs_; - mutable size_t cachedAmount_ = 0; // we cached this amount of data mutable size_t indexOfPartialMatch_ = 0; // this index from rpairs_ represents last partial match - - // this is used to hold temporary data while the logic is - // looking for the new beginning of the cached value - mutable vector cachedData_; }; void ChoiceReplacer::DoReplacements(const char toProcess, const bool aEod) const @@ -356,11 +362,10 @@ namespace /// /// replaces for lexemes of the same length /// -class UniformLexemeReplacer final : public ReplacerWithNext +class UniformLexemeReplacer final : public BaseReplacerWithCache { public: UniformLexemeReplacer(StreamReplacerChoice& choice, const size_t sz) - : cachedData_(sz) { for (AbstractLexemesPair& alpair : choice) { @@ -375,19 +380,24 @@ class UniformLexemeReplacer final : public ReplacerWithNext cout << coloredconsole::toconsole(warningDuplicatePattern) << endl; } } + cachedData_.resize(sz); } void DoReplacements(const char toProcess, const bool aEod) const override; protected: + /// + /// We got the 'end' character so there are no match -> we should pass further all the cache + /// + /// character received along with end of data sign + void DoReplacementsAtTheEndOfTheData(const char toProcess) const + { + SendFurther(std::span (cachedData_.data(), cachedAmount_)); + CleanTheCache(cachedAmount_); + pNext_->DoReplacements(toProcess, true); + } // here we hold pairs of sources and targets unordered_map replaceOptions_; - - mutable size_t cachedAmount_ = 0; // we cache this amount of data in the cachedData_ - - // this is used to hold temporary data while the logic is - // looking for the new beginning of the cached value - mutable vector cachedData_; }; @@ -399,33 +409,26 @@ void UniformLexemeReplacer::DoReplacements(const char toProcess, const bool aEod } // no more data - if (aEod) + if (aEod) [[unlikely]] { - if (cachedAmount_ > 0) - { - for (size_t q = 0; q < cachedAmount_; ++q) { pNext_->DoReplacements(cachedData_[q], false); } - cachedAmount_ = 0; - } - pNext_->DoReplacements(toProcess, aEod); // send end of the data further + DoReplacementsAtTheEndOfTheData(toProcess); return; - } // if (aEod) - + } // set buffer of cached at once - char* const& pBuffer = cachedData_.data(); - pBuffer[cachedAmount_++] = toProcess; - if (cachedAmount_ >= cachedData_.size()) - { - if (const auto it = replaceOptions_.find(string_view(pBuffer, cachedAmount_)); it != replaceOptions_.cend()) - { // found - string_view trg = it->second; - for (size_t q = 0; q < trg.size(); ++q) { pNext_->DoReplacements(trg[q], false); } - cachedAmount_ = 0; + cachedData_[cachedAmount_++] = toProcess; + if (cachedAmount_ == cachedData_.size()) + { + if (const auto matchIt = replaceOptions_.find(string_view( cachedData_.data(), cachedAmount_)); matchIt != replaceOptions_.cend()) + { + string_view trg = matchIt->second; + SendFurther(trg); + CleanTheCache(cachedAmount_); } else - { // not found - pNext_->DoReplacements(pBuffer[0], false); // send 1 char - std::shift_left(pBuffer, pBuffer + cachedAmount_--, 1); + { + SendFurther(std::span (cachedData_.data(), 1)); + CleanTheCache(1); } } } @@ -435,7 +438,7 @@ void UniformLexemeReplacer::DoReplacements(const char toProcess, const bool aEod /// /// replaces for lexemes of the same length /// -class LexemeOf1Replacer final : public ReplacerWithNext +class LexemeOf1Replacer final : public BaseReplacer { public: LexemeOf1Replacer(StreamReplacerChoice& choice) @@ -484,7 +487,7 @@ void LexemeOf1Replacer::DoReplacements(const char toProcess, const bool aEod) co if (replaces_[index].present_) { auto& trg = replaces_[index].trg_; - for (size_t q = 0; q < trg.size(); ++q) { pNext_->DoReplacements(trg[q], false); } + SendFurther(trg); } else {