Skip to content

Commit 7e3cdce

Browse files
Merge pull request #3 from arangodb/feature/icu-namespace
Feature/icu namespace
2 parents 207b0b9 + da8d0fd commit 7e3cdce

14 files changed

+196
-107
lines changed

core/CMakeLists.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,10 @@ if (CLANG_TIDY_EXE)
428428
)
429429
endif ()
430430

431+
if (NOT ("${IRESEARCH_EXTERNAL_DEPENDENCIES}" STREQUAL ""))
432+
add_dependencies(iresearch-static ${IRESEARCH_EXTERNAL_DEPENDENCIES})
433+
endif()
434+
431435
################################################################################
432436
### analysis plugin : delimiter
433437
################################################################################
@@ -503,6 +507,10 @@ target_link_libraries(iresearch-analyzer-text-static
503507
stemmer-static
504508
)
505509

510+
if (NOT ("${IRESEARCH_EXTERNAL_DEPENDENCIES}" STREQUAL ""))
511+
add_dependencies(iresearch-analyzer-text-static ${IRESEARCH_EXTERNAL_DEPENDENCIES})
512+
endif()
513+
506514
################################################################################
507515
### analysis plugin : collation token stream
508516
################################################################################
@@ -530,6 +538,10 @@ target_link_libraries(iresearch-analyzer-collation-static
530538
iresearch-static
531539
)
532540

541+
if (NOT ("${IRESEARCH_EXTERNAL_DEPENDENCIES}" STREQUAL ""))
542+
add_dependencies(iresearch-analyzer-collation-static ${IRESEARCH_EXTERNAL_DEPENDENCIES})
543+
endif()
544+
533545
################################################################################
534546
### analysis plugin : minhash
535547
################################################################################
@@ -579,6 +591,10 @@ target_link_libraries(iresearch-analyzer-norm-static
579591
iresearch-static
580592
)
581593

594+
if (NOT ("${IRESEARCH_EXTERNAL_DEPENDENCIES}" STREQUAL ""))
595+
add_dependencies(iresearch-analyzer-norm-static ${IRESEARCH_EXTERNAL_DEPENDENCIES})
596+
endif()
597+
582598
################################################################################
583599
### analysis plugin : text token stemming
584600
################################################################################
@@ -609,6 +625,10 @@ target_link_libraries(iresearch-analyzer-stem-static
609625
stemmer-static
610626
)
611627

628+
if (NOT ("${IRESEARCH_EXTERNAL_DEPENDENCIES}" STREQUAL ""))
629+
add_dependencies(iresearch-analyzer-stem-static ${IRESEARCH_EXTERNAL_DEPENDENCIES})
630+
endif()
631+
612632
################################################################################
613633
### analysis plugin : token stopwords
614634
################################################################################

core/analysis/collation_token_stream.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ using namespace irs;
4141

4242
constexpr std::string_view LOCALE_PARAM_NAME{"locale"};
4343

44-
bool locale_from_slice(VPackSlice slice, icu::Locale& locale) {
44+
bool locale_from_slice(VPackSlice slice,
45+
IRESEARCH_ICU_NAMESPACE::Locale& locale) {
4546
if (!slice.isString()) {
4647
IRS_LOG_WARN(absl::StrCat(
4748
"Non-string value in '", LOCALE_PARAM_NAME,
@@ -52,7 +53,8 @@ bool locale_from_slice(VPackSlice slice, icu::Locale& locale) {
5253

5354
const auto locale_name = slice.copyString();
5455

55-
locale = icu::Locale::createCanonical(locale_name.c_str());
56+
locale =
57+
IRESEARCH_ICU_NAMESPACE::Locale::createCanonical(locale_name.c_str());
5658

5759
if (locale.isBogus()) {
5860
IRS_LOG_WARN(absl::StrCat(
@@ -62,10 +64,10 @@ bool locale_from_slice(VPackSlice slice, icu::Locale& locale) {
6264
return false;
6365
}
6466

65-
// validate creation of icu::Collator
67+
// validate creation of IRESEARCH_ICU_NAMESPACE::Collator
6668
auto err = UErrorCode::U_ZERO_ERROR;
67-
std::unique_ptr<icu::Collator> collator{
68-
icu::Collator::createInstance(locale, err)};
69+
std::unique_ptr<IRESEARCH_ICU_NAMESPACE::Collator> collator{
70+
IRESEARCH_ICU_NAMESPACE::Collator::createInstance(locale, err)};
6971

7072
if (!collator) {
7173
IRS_LOG_WARN(absl::StrCat("Can't instantiate icu::Collator from locale: ",
@@ -223,7 +225,7 @@ constexpr size_t MAX_TOKEN_SIZE = 1 << 15;
223225

224226
struct collation_token_stream::state_t {
225227
const options_t options;
226-
std::unique_ptr<icu::Collator> collator;
228+
std::unique_ptr<IRESEARCH_ICU_NAMESPACE::Collator> collator;
227229
byte_type term_buf[MAX_TOKEN_SIZE];
228230

229231
explicit state_t(const options_t& opts) : options(opts) {}
@@ -247,8 +249,8 @@ collation_token_stream::collation_token_stream(const options_t& options)
247249
bool collation_token_stream::reset(std::string_view data) {
248250
if (!state_->collator) {
249251
auto err = UErrorCode::U_ZERO_ERROR;
250-
state_->collator.reset(
251-
icu::Collator::createInstance(state_->options.locale, err));
252+
state_->collator.reset(IRESEARCH_ICU_NAMESPACE::Collator::createInstance(
253+
state_->options.locale, err));
252254

253255
if (!U_SUCCESS(err) || !state_->collator) {
254256
state_->collator.reset();
@@ -262,8 +264,10 @@ bool collation_token_stream::reset(std::string_view data) {
262264
return false; // ICU UnicodeString signatures can handle at most INT32_MAX
263265
}
264266

265-
const icu::UnicodeString icu_token = icu::UnicodeString::fromUTF8(
266-
icu::StringPiece(data.data(), static_cast<int32_t>(data.size())));
267+
const IRESEARCH_ICU_NAMESPACE::UnicodeString icu_token =
268+
IRESEARCH_ICU_NAMESPACE::UnicodeString::fromUTF8(
269+
IRESEARCH_ICU_NAMESPACE::StringPiece(data.data(),
270+
static_cast<int32_t>(data.size())));
267271

268272
byte_type raw_term_buf[MAX_TOKEN_SIZE];
269273
static_assert(sizeof raw_term_buf == sizeof state_->term_buf);

core/analysis/collation_token_stream.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222

2323
#pragma once
2424

25+
#ifndef IRESEARCH_ICU_NAMESPACE
26+
#define IRESEARCH_ICU_NAMESPACE icu
27+
#endif
28+
2529
#include <unicode/locid.h>
2630

2731
#include "analyzers.hpp"
@@ -42,7 +46,7 @@ class collation_token_stream final
4246
private util::noncopyable {
4347
public:
4448
struct options_t {
45-
icu::Locale locale;
49+
IRESEARCH_ICU_NAMESPACE::Locale locale;
4650
bool forceUtf8;
4751

4852
options_t() : locale{"C"}, forceUtf8{true} { locale.setToBogus(); }

core/analysis/text_token_normalizing_stream.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,12 @@ namespace analysis {
5353
// -----------------------------------------------------------------------------
5454

5555
struct normalizing_token_stream::state_t {
56-
icu::UnicodeString data;
57-
icu::UnicodeString token;
56+
IRESEARCH_ICU_NAMESPACE::UnicodeString data;
57+
IRESEARCH_ICU_NAMESPACE::UnicodeString token;
5858
std::string term_buf;
59-
const icu::Normalizer2* normalizer; // reusable object owned by ICU
60-
std::unique_ptr<icu::Transliterator> transliterator;
59+
const IRESEARCH_ICU_NAMESPACE::Normalizer2*
60+
normalizer; // reusable object owned by ICU
61+
std::unique_ptr<IRESEARCH_ICU_NAMESPACE::Transliterator> transliterator;
6162
const options_t options;
6263

6364
explicit state_t(const options_t& opts) : normalizer{}, options{opts} {}
@@ -82,7 +83,8 @@ constexpr frozen::unordered_map<
8283
{"upper", analysis::normalizing_token_stream::UPPER},
8384
};
8485

85-
bool locale_from_slice(VPackSlice slice, icu::Locale& locale) {
86+
bool locale_from_slice(VPackSlice slice,
87+
IRESEARCH_ICU_NAMESPACE::Locale& locale) {
8688
if (!slice.isString()) {
8789
IRS_LOG_WARN(
8890
absl::StrCat("Non-string value in '", LOCALE_PARAM_NAME,
@@ -94,11 +96,11 @@ bool locale_from_slice(VPackSlice slice, icu::Locale& locale) {
9496

9597
const auto locale_name = slice.copyString();
9698

97-
locale = icu::Locale::createFromName(locale_name.c_str());
99+
locale = IRESEARCH_ICU_NAMESPACE::Locale::createFromName(locale_name.c_str());
98100

99101
if (!locale.isBogus()) {
100-
locale = icu::Locale{locale.getLanguage(), locale.getCountry(),
101-
locale.getVariant()};
102+
locale = IRESEARCH_ICU_NAMESPACE::Locale{
103+
locale.getLanguage(), locale.getCountry(), locale.getVariant()};
102104
}
103105

104106
if (locale.isBogus()) {
@@ -349,7 +351,8 @@ bool normalizing_token_stream::reset(std::string_view data) {
349351

350352
if (!state_->normalizer) {
351353
// reusable object owned by ICU
352-
state_->normalizer = icu::Normalizer2::getNFCInstance(err);
354+
state_->normalizer =
355+
IRESEARCH_ICU_NAMESPACE::Normalizer2::getNFCInstance(err);
353356

354357
if (!U_SUCCESS(err) || !state_->normalizer) {
355358
state_->normalizer = nullptr;
@@ -362,12 +365,13 @@ bool normalizing_token_stream::reset(std::string_view data) {
362365
// transliteration rule taken verbatim from:
363366
// http://userguide.icu-project.org/transforms/general do not allocate
364367
// statically since it causes memory leaks in ICU
365-
const icu::UnicodeString collationRule(
368+
const IRESEARCH_ICU_NAMESPACE::UnicodeString collationRule(
366369
"NFD; [:Nonspacing Mark:] Remove; NFC");
367370

368371
// reusable object owned by *this
369-
state_->transliterator.reset(icu::Transliterator::createInstance(
370-
collationRule, UTransDirection::UTRANS_FORWARD, err));
372+
state_->transliterator.reset(
373+
IRESEARCH_ICU_NAMESPACE::Transliterator::createInstance(
374+
collationRule, UTransDirection::UTRANS_FORWARD, err));
371375

372376
if (!U_SUCCESS(err) || !state_->transliterator) {
373377
state_->transliterator.reset();
@@ -382,8 +386,9 @@ bool normalizing_token_stream::reset(std::string_view data) {
382386
return false;
383387
}
384388

385-
state_->data = icu::UnicodeString::fromUTF8(
386-
icu::StringPiece{data.data(), static_cast<int32_t>(data.size())});
389+
state_->data = IRESEARCH_ICU_NAMESPACE::UnicodeString::fromUTF8(
390+
IRESEARCH_ICU_NAMESPACE::StringPiece{data.data(),
391+
static_cast<int32_t>(data.size())});
387392

388393
// normalize unicode
389394
state_->normalizer->normalize(state_->data, state_->token, err);

core/analysis/text_token_normalizing_stream.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323

2424
#pragma once
2525

26+
#ifndef IRESEARCH_ICU_NAMESPACE
27+
#define IRESEARCH_ICU_NAMESPACE icu
28+
#endif
29+
2630
#include <unicode/locid.h>
2731

2832
#include "analyzers.hpp"
@@ -45,7 +49,7 @@ class normalizing_token_stream final
4549
enum case_convert_t { LOWER, NONE, UPPER };
4650

4751
struct options_t {
48-
icu::Locale locale;
52+
IRESEARCH_ICU_NAMESPACE::Locale locale;
4953
case_convert_t case_convert{
5054
case_convert_t::NONE}; // no extra normalization
5155
bool accent{true}; // no extra normalization

core/analysis/text_token_stemming_stream.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ using namespace irs;
3939

4040
constexpr std::string_view LOCALE_PARAM_NAME{"locale"};
4141

42-
bool locale_from_slice(VPackSlice slice, icu::Locale& locale) {
42+
bool locale_from_slice(VPackSlice slice,
43+
IRESEARCH_ICU_NAMESPACE::Locale& locale) {
4344
if (!slice.isString()) {
4445
IRS_LOG_WARN(absl::StrCat(
4546
"Non-string value in '", LOCALE_PARAM_NAME,
@@ -50,10 +51,10 @@ bool locale_from_slice(VPackSlice slice, icu::Locale& locale) {
5051

5152
const auto locale_name = slice.copyString();
5253

53-
locale = icu::Locale::createFromName(locale_name.c_str());
54+
locale = IRESEARCH_ICU_NAMESPACE::Locale::createFromName(locale_name.c_str());
5455

5556
if (!locale.isBogus()) {
56-
locale = icu::Locale{locale.getLanguage()};
57+
locale = IRESEARCH_ICU_NAMESPACE::Locale{locale.getLanguage()};
5758
}
5859

5960
if (locale.isBogus()) {

core/analysis/text_token_stemming_stream.hpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323

2424
#pragma once
2525

26+
#ifndef IRESEARCH_ICU_NAMESPACE
27+
#define IRESEARCH_ICU_NAMESPACE icu
28+
#endif
29+
2630
#include <unicode/locid.h>
2731

2832
#include "analyzers.hpp"
@@ -43,7 +47,7 @@ class stemming_token_stream final : public TypedAnalyzer<stemming_token_stream>,
4347
private util::noncopyable {
4448
public:
4549
struct options_t {
46-
icu::Locale locale;
50+
IRESEARCH_ICU_NAMESPACE::Locale locale;
4751

4852
options_t() : locale{"C"} { locale.setToBogus(); }
4953
};

0 commit comments

Comments
 (0)