Skip to content

Commit a4fd3f3

Browse files
authored
Add some additional API for context words and only keep words based on max lm order (#114)
1 parent bb8a61a commit a4fd3f3

File tree

5 files changed

+66
-3
lines changed

5 files changed

+66
-3
lines changed

src/libime/core/languagemodel.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ LanguageModel::LanguageModel(
148148

149149
LanguageModel::~LanguageModel() {}
150150

151+
size_t LanguageModel::maxOrder() { return KENLM_MAX_ORDER; }
152+
151153
std::shared_ptr<const StaticLanguageModelFile>
152154
LanguageModel::languageModelFile() const {
153155
FCITX_D();

src/libime/core/languagemodel.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ class LIBIMECORE_EXPORT LanguageModel : public LanguageModelBase {
7373
std::shared_ptr<const StaticLanguageModelFile> file = nullptr);
7474
virtual ~LanguageModel();
7575

76+
static size_t maxOrder();
77+
7678
std::shared_ptr<const StaticLanguageModelFile> languageModelFile() const;
7779

7880
WordIndex beginSentence() const override;

src/libime/pinyin/pinyincontext.cpp

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <functional>
1111
#include <iterator>
1212
#include <limits>
13+
#include <list>
1314
#include <memory>
1415
#include <span>
1516
#include <stdexcept>
@@ -81,7 +82,7 @@ class PinyinContextPrivate : public fcitx::QPtrHolder<PinyinContext> {
8182
mutable std::vector<SentenceResult> candidatesToCursor_;
8283
mutable std::unordered_set<std::string> candidatesToCursorSet_;
8384
std::vector<fcitx::ScopedConnection> conn_;
84-
std::vector<WordNode> contextWords_;
85+
std::list<WordNode> contextWords_;
8586

8687
size_t alignCursorToNextSegment() const {
8788
FCITX_Q();
@@ -988,10 +989,38 @@ void PinyinContext::setContextWords(
988989
const std::vector<std::string> &contextWords) {
989990
FCITX_D();
990991
d->contextWords_.clear();
991-
for (const auto &word : contextWords) {
992+
appendContextWords(contextWords);
993+
}
994+
995+
void PinyinContext::clearContextWords() {
996+
FCITX_D();
997+
d->contextWords_.clear();
998+
}
999+
1000+
void PinyinContext::appendContextWords(
1001+
const std::vector<std::string> &contextWords) {
1002+
FCITX_D();
1003+
1004+
size_t needed = LanguageModel::maxOrder() - 1;
1005+
1006+
for (const auto &word :
1007+
std::span{contextWords}.last(std::min(contextWords.size(), needed))) {
9921008
d->contextWords_.push_back(
9931009
WordNode(word, d->ime_->model()->index(word)));
9941010
}
1011+
while (d->contextWords_.size() > needed) {
1012+
d->contextWords_.pop_front();
1013+
}
1014+
}
1015+
1016+
std::vector<std::string> PinyinContext::contextWords() const {
1017+
FCITX_D();
1018+
std::vector<std::string> words;
1019+
words.reserve(d->contextWords_.size());
1020+
for (const auto &word : d->contextWords_) {
1021+
words.push_back(word.word());
1022+
}
1023+
return words;
9951024
}
9961025

9971026
bool PinyinContext::learnWord() { return false; }

src/libime/pinyin/pinyincontext.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,26 @@ class LIBIMEPINYIN_EXPORT PinyinContext : public InputBuffer {
144144
*/
145145
void setContextWords(const std::vector<std::string> &contextWords);
146146

147+
/**
148+
* Clear context words.
149+
* @since 1.1.13
150+
*/
151+
void clearContextWords();
152+
153+
/**
154+
* Append context words for better prediction.
155+
* @param contextWords The context words.
156+
* @since 1.1.13
157+
*/
158+
void appendContextWords(const std::vector<std::string> &contextWords);
159+
160+
/**
161+
* Get context words for better prediction.
162+
* @return current context words
163+
* @since 1.1.13
164+
*/
165+
std::vector<std::string> contextWords() const;
166+
147167
protected:
148168
bool typeImpl(const char *s, size_t length) override;
149169

test/testpinyincontext.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
#include <memory>
1111
#include <ostream>
1212
#include <sstream>
13+
#include <string>
1314
#include <string_view>
15+
#include <vector>
1416
#include <fcitx-utils/log.h>
1517
#include "libime/core/historybigram.h"
1618
#include "libime/core/lattice.h"
@@ -262,7 +264,15 @@ int main() {
262264
}
263265
{
264266
c.clear();
265-
c.setContextWords({"", ""});
267+
c.setContextWords({"", "", "知道"});
268+
FCITX_ASSERT(c.contextWords() ==
269+
std::vector<std::string>{"", "知道"});
270+
c.setContextWords({"", ""});
271+
FCITX_ASSERT(c.contextWords() ==
272+
std::vector<std::string>{"", ""});
273+
c.appendContextWords({""});
274+
FCITX_ASSERT(c.contextWords() ==
275+
std::vector<std::string>{"", ""});
266276
c.type("ta");
267277
size_t i = 0;
268278
for (const auto &candidate : c.candidatesToCursor()) {

0 commit comments

Comments
 (0)