Skip to content

Commit e7de5dd

Browse files
author
Andrei Lobov
committed
fix name + tests
1 parent cfcaa24 commit e7de5dd

File tree

2 files changed

+113
-68
lines changed

2 files changed

+113
-68
lines changed

core/analysis/multi_delimited_token_stream.cpp

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ irs::analysis::analyzer::ptr Make(MultiDelimitedAnalyser::Options&& opts) {
409409
return std::make_unique<MultiDelimitedTokenStreamGeneric>(std::move(opts));
410410
}
411411

412-
constexpr std::string_view kDelimiterParamName{"delimiter"};
412+
constexpr std::string_view kDelimiterParamName{"delimiters"};
413413

414414
bool ParseVpackOptions(VPackSlice slice,
415415
MultiDelimitedAnalyser::Options& options) {
@@ -418,46 +418,42 @@ bool ParseVpackOptions(VPackSlice slice,
418418
"Slice for multi_delimited_token_stream is not an object or string");
419419
return false;
420420
}
421+
auto delim_array_slice = slice.get(kDelimiterParamName);
422+
if (!delim_array_slice.isArray()) {
423+
IRS_LOG_WARN(
424+
absl::StrCat("Invalid type or missing '", kDelimiterParamName,
425+
"' (array expected) for multi_delimited_token_stream from "
426+
"VPack arguments"));
427+
return false;
428+
}
421429

422-
if (auto delim_array_slice = slice.get(kDelimiterParamName);
423-
!delim_array_slice.isNone()) {
424-
if (!delim_array_slice.isArray()) {
425-
IRS_LOG_WARN(
426-
absl::StrCat("Invalid type '", kDelimiterParamName,
427-
"' (array expected) for multi_delimited_token_stream from "
428-
"VPack arguments"));
430+
for (auto delim : VPackArrayIterator(delim_array_slice)) {
431+
if (!delim.isString()) {
432+
IRS_LOG_WARN(absl::StrCat(
433+
"Invalid type in '", kDelimiterParamName,
434+
"' (string expected) for multi_delimited_token_stream from "
435+
"VPack arguments"));
429436
return false;
430437
}
438+
auto view = ViewCast<byte_type>(delim.stringView());
431439

432-
for (auto delim : VPackArrayIterator(delim_array_slice)) {
433-
if (!delim.isString()) {
434-
IRS_LOG_WARN(absl::StrCat(
435-
"Invalid type in '", kDelimiterParamName,
436-
"' (string expected) for multi_delimited_token_stream from "
437-
"VPack arguments"));
438-
return false;
439-
}
440-
auto view = ViewCast<byte_type>(delim.stringView());
440+
if (view.empty()) {
441+
IRS_LOG_ERROR("Delimiter list contains an empty string.");
442+
return false;
443+
}
441444

442-
if (view.empty()) {
443-
IRS_LOG_ERROR("Delimiter list contains an empty string.");
445+
for (const auto& known : options.delimiters) {
446+
if (view.starts_with(known) || known.starts_with(view)) {
447+
IRS_LOG_ERROR(
448+
absl::StrCat("Some delimiters are a prefix of others. See `",
449+
ViewCast<char>(bytes_view{known}), "` and `",
450+
delim.stringView(), "`"));
444451
return false;
445452
}
446-
447-
for (const auto& known : options.delimiters) {
448-
if (view.starts_with(known) || known.starts_with(view)) {
449-
IRS_LOG_ERROR(
450-
absl::StrCat("Some delimiters are a prefix of others. See `",
451-
ViewCast<char>(bytes_view{known}), "` and `",
452-
delim.stringView(), "`"));
453-
return false;
454-
}
455-
}
456-
457-
options.delimiters.emplace_back(view);
458453
}
459-
}
460454

455+
options.delimiters.emplace_back(view);
456+
}
461457
return true;
462458
}
463459

tests/analysis/multi_delimited_token_stream_tests.cpp

Lines changed: 85 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
#include "analysis/multi_delimited_token_stream.hpp"
2222
#include "gtest/gtest.h"
2323
#include "tests_config.hpp"
24+
#include "velocypack/Parser.h"
25+
26+
using namespace arangodb::velocypack;
27+
using namespace irs::analysis;
2428

2529
namespace {
2630

@@ -30,6 +34,9 @@ irs::bstring operator""_b(const char* ptr, std::size_t size) {
3034
}
3135

3236
class multi_delimited_token_stream_tests : public ::testing::Test {
37+
public:
38+
static void SetUpTestCase() { MultiDelimitedAnalyser::init(); }
39+
3340
virtual void SetUp() {
3441
// Code here will be called immediately after the constructor (right before
3542
// each test).
@@ -48,15 +55,12 @@ class multi_delimited_token_stream_tests : public ::testing::Test {
4855
// -----------------------------------------------------------------------------
4956

5057
TEST_F(multi_delimited_token_stream_tests, consts) {
51-
static_assert("multi_delimiter" ==
52-
irs::type<irs::analysis::MultiDelimitedAnalyser>::name());
58+
static_assert("multi_delimiter" == irs::type<MultiDelimitedAnalyser>::name());
5359
}
5460

5561
TEST_F(multi_delimited_token_stream_tests, test_delimiter) {
56-
auto stream =
57-
irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {"a"_b}});
58-
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
59-
stream->type());
62+
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {"a"_b}});
63+
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());
6064

6165
ASSERT_TRUE(stream->reset("baccaad"));
6266

@@ -83,10 +87,8 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter) {
8387
}
8488

8589
TEST_F(multi_delimited_token_stream_tests, test_delimiter_empty_match) {
86-
auto stream =
87-
irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {"."_b}});
88-
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
89-
stream->type());
90+
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {"."_b}});
91+
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());
9092

9193
ASSERT_TRUE(stream->reset(".."));
9294

@@ -97,10 +99,9 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_empty_match) {
9799
}
98100

99101
TEST_F(multi_delimited_token_stream_tests, test_delimiter_3) {
100-
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
101-
{.delimiters = {";"_b, ","_b, "|"_b}});
102-
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
103-
stream->type());
102+
auto stream =
103+
MultiDelimitedAnalyser::Make({.delimiters = {";"_b, ","_b, "|"_b}});
104+
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());
104105

105106
ASSERT_TRUE(stream->reset("a;b||c|d,ff"));
106107

@@ -133,10 +134,9 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_3) {
133134
}
134135

135136
TEST_F(multi_delimited_token_stream_tests, test_delimiter_5) {
136-
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
137+
auto stream = MultiDelimitedAnalyser::Make(
137138
{.delimiters = {";"_b, ","_b, "|"_b, "."_b, ":"_b}});
138-
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
139-
stream->type());
139+
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());
140140

141141
ASSERT_TRUE(stream->reset("a:b||c.d,ff."));
142142

@@ -169,10 +169,8 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_5) {
169169
}
170170

171171
TEST_F(multi_delimited_token_stream_tests, test_delimiter_single_long) {
172-
auto stream =
173-
irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b}});
174-
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
175-
stream->type());
172+
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b}});
173+
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());
176174

177175
ASSERT_TRUE(stream->reset("foobarfoobazbarfoobar"));
178176

@@ -197,9 +195,8 @@ TEST_F(multi_delimited_token_stream_tests, test_delimiter_single_long) {
197195
}
198196

199197
TEST_F(multi_delimited_token_stream_tests, no_delimiter) {
200-
auto stream = irs::analysis::MultiDelimitedAnalyser::Make({.delimiters = {}});
201-
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
202-
stream->type());
198+
auto stream = MultiDelimitedAnalyser::Make({.delimiters = {}});
199+
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());
203200

204201
ASSERT_TRUE(stream->reset("foobar"));
205202

@@ -216,10 +213,9 @@ TEST_F(multi_delimited_token_stream_tests, no_delimiter) {
216213
}
217214

218215
TEST_F(multi_delimited_token_stream_tests, multi_words) {
219-
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
220-
{.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
221-
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
222-
stream->type());
216+
auto stream =
217+
MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
218+
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());
223219

224220
ASSERT_TRUE(stream->reset("fooxyzbarbazz"));
225221

@@ -240,10 +236,9 @@ TEST_F(multi_delimited_token_stream_tests, multi_words) {
240236
}
241237

242238
TEST_F(multi_delimited_token_stream_tests, multi_words_2) {
243-
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
244-
{.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
245-
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
246-
stream->type());
239+
auto stream =
240+
MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b, "bar"_b, "baz"_b}});
241+
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());
247242

248243
ASSERT_TRUE(stream->reset("foobarbaz"));
249244

@@ -254,10 +249,9 @@ TEST_F(multi_delimited_token_stream_tests, multi_words_2) {
254249
}
255250

256251
TEST_F(multi_delimited_token_stream_tests, trick_matching_1) {
257-
auto stream = irs::analysis::MultiDelimitedAnalyser::Make(
258-
{.delimiters = {"foo"_b, "ffa"_b}});
259-
ASSERT_EQ(irs::type<irs::analysis::MultiDelimitedAnalyser>::id(),
260-
stream->type());
252+
auto stream =
253+
MultiDelimitedAnalyser::Make({.delimiters = {"foo"_b, "ffa"_b}});
254+
ASSERT_EQ(irs::type<MultiDelimitedAnalyser>::id(), stream->type());
261255

262256
ASSERT_TRUE(stream->reset("abcffoobar"));
263257

@@ -276,3 +270,58 @@ TEST_F(multi_delimited_token_stream_tests, trick_matching_1) {
276270
ASSERT_EQ(offset->end, 10);
277271
ASSERT_FALSE(stream->next());
278272
}
273+
274+
TEST_F(multi_delimited_token_stream_tests, construct) {
275+
// wrong name
276+
{
277+
auto builder = Parser::fromJson(R"({"delimiter":["a", "b"]})");
278+
std::string in_str;
279+
in_str.assign(builder->slice().startAs<char>(),
280+
builder->slice().byteSize());
281+
auto stream = analyzers::get(
282+
"multi_delimiter", irs::type<irs::text_format::vpack>::get(), in_str);
283+
ASSERT_EQ(nullptr, stream);
284+
}
285+
286+
// wrong type
287+
{
288+
auto builder = Parser::fromJson(R"({"delimiters":1})");
289+
std::string in_str;
290+
in_str.assign(builder->slice().startAs<char>(),
291+
builder->slice().byteSize());
292+
auto stream = analyzers::get(
293+
"multi_delimiter", irs::type<irs::text_format::vpack>::get(), in_str);
294+
ASSERT_EQ(nullptr, stream);
295+
}
296+
297+
{
298+
auto builder = Parser::fromJson(R"({"delimiters":["a", "b"]})");
299+
std::string in_str;
300+
in_str.assign(builder->slice().startAs<char>(),
301+
builder->slice().byteSize());
302+
auto stream = analyzers::get(
303+
"multi_delimiter", irs::type<irs::text_format::vpack>::get(), in_str);
304+
ASSERT_NE(nullptr, stream);
305+
ASSERT_TRUE(stream->reset("aib"));
306+
ASSERT_TRUE(stream->next());
307+
auto* term = irs::get<irs::term_attribute>(*stream);
308+
ASSERT_EQ("i", irs::ViewCast<char>(term->value));
309+
ASSERT_FALSE(stream->next());
310+
}
311+
{
312+
auto builder = Parser::fromJson(R"({"delimiters":["a", "b", "c", "d"]})");
313+
std::string in_str;
314+
in_str.assign(builder->slice().startAs<char>(),
315+
builder->slice().byteSize());
316+
std::string actual;
317+
auto stream =
318+
analyzers::normalize(actual, "multi_delimiter",
319+
irs::type<irs::text_format::vpack>::get(), in_str);
320+
321+
auto slice = Slice(reinterpret_cast<uint8_t*>(actual.data()));
322+
ASSERT_TRUE(slice.isObject());
323+
auto delimiters = slice.get("delimiters");
324+
ASSERT_TRUE(delimiters.isArray());
325+
ASSERT_EQ(4, delimiters.length());
326+
}
327+
}

0 commit comments

Comments
 (0)