Skip to content

Commit 0757bad

Browse files
committed
perf(core): revert mutex to optimize single-thread encoding speed
1 parent 1373567 commit 0757bad

File tree

1 file changed

+4
-11
lines changed

1 file changed

+4
-11
lines changed

src/tokenizer.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include <oniguruma.h>
1717
#include <utf8proc/utf8proc.h>
1818
#include <iostream>
19-
#include <mutex>
2019
#include "ujson.hpp"
2120
#include "jinja.hpp"
2221

@@ -518,7 +517,6 @@ class BPEModel : public Model {
518517
std::unordered_map<std::string, int> vocab_;
519518
std::unordered_map<int, std::string> id_to_token_;
520519
std::unordered_map<std::pair<int, int>, int, PairHash> merges_;
521-
mutable std::mutex cache_mutex_;
522520
mutable std::unordered_map<std::string, std::vector<int>> cache_;
523521

524522
BPEModel(const std::map<std::string, int>& vocab,
@@ -543,11 +541,9 @@ class BPEModel : public Model {
543541

544542
std::vector<int> tokenize(const std::string& text) const override {
545543
if (text.empty()) return {};
546-
{
547-
std::lock_guard<std::mutex> lock(cache_mutex_);
548-
auto cit = cache_.find(text);
549-
if (cit != cache_.end()) return cit->second;
550-
}
544+
auto cit = cache_.find(text);
545+
if (cit != cache_.end()) return cit->second;
546+
551547
std::vector<int> out;
552548
if (use_byte_level_) {
553549
static auto byte_map = create_bytes_char_map();
@@ -589,10 +585,7 @@ class BPEModel : public Model {
589585
int nid = token_to_id(m); if (nid == -1) break;
590586
out[best] = nid; out.erase(out.begin() + best + 1);
591587
}
592-
{
593-
std::lock_guard<std::mutex> lock(cache_mutex_);
594-
cache_[text] = out;
595-
}
588+
cache_[text] = out;
596589
return out;
597590
}
598591

0 commit comments

Comments
 (0)