diff --git a/CMakeLists.txt b/CMakeLists.txt index bd1ef085..342828ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ project(OpenNMTTokenizer) option(BUILD_TESTS "Compile unit tests" OFF) option(BUILD_SHARED_LIBS "Build shared libraries" ON) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 17) if(CMAKE_VERSION VERSION_LESS "3.7.0") set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") endif() diff --git a/src/SentencePiece.cc b/src/SentencePiece.cc index 5c7cf5f9..8c0afae8 100644 --- a/src/SentencePiece.cc +++ b/src/SentencePiece.cc @@ -54,7 +54,12 @@ namespace onmt if (options && (options->joiner_annotate || options->spacer_new)) throw std::invalid_argument("SentencePiece vocabulary restriction requires the tokenization " "to use \"spacer_annotate\" (same as spm_encode)"); - auto status = _processor->SetVocabulary(vocabulary); + std::vector vocabulary_views; + vocabulary_views.reserve(vocabulary.size()); + for (const auto& s : vocabulary) { + vocabulary_views.emplace_back(s); + } + auto status = _processor->SetVocabulary(vocabulary_views); if (!status.ok()) throw std::invalid_argument(status.ToString()); } diff --git a/third_party/sentencepiece b/third_party/sentencepiece index 17d7580d..27344904 160000 --- a/third_party/sentencepiece +++ b/third_party/sentencepiece @@ -1 +1 @@ -Subproject commit 17d7580d6407802f85855d2cc9190634e2c95624 +Subproject commit 273449044caa593c2fd7eb7550cb3ab2cff93f1a