Skip to content

Commit 1605954

Browse files
authored
Wildcard analyzer helpers (iresearch-toolkit#578)
* WIP * WIP
1 parent acdbdcb commit 1605954

File tree

7 files changed

+208
-220
lines changed

7 files changed

+208
-220
lines changed

core/analysis/analyzers.cpp

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,18 @@
2323

2424
#include "analysis/analyzers.hpp"
2525

26+
#include <velocypack/Builder.h>
27+
#include <velocypack/Parser.h>
28+
29+
#include "analysis/token_streams.hpp"
2630
#include "utils/hash_utils.hpp"
2731
#include "utils/register.hpp"
28-
29-
namespace {
32+
#include "utils/vpack_utils.hpp"
3033

3134
using namespace irs;
35+
using namespace arangodb;
36+
37+
namespace {
3238

3339
struct key {
3440
key(std::string_view type, const irs::type_info& args_format)
@@ -70,10 +76,10 @@ struct hash<::key> {
7076
};
7177

7278
} // namespace std
73-
79+
namespace irs::analysis {
7480
namespace {
7581

76-
constexpr std::string_view kFileNamePrefix{"libanalyzer-"};
82+
constexpr std::string_view kFileNamePrefix = "libanalyzer-";
7783

7884
class analyzer_register final
7985
: public irs::tagged_generic_register<::key, ::value, std::string_view,
@@ -93,9 +99,27 @@ class analyzer_register final
9399
}
94100
};
95101

96-
} // namespace
102+
constexpr std::string_view kTypeParam = "type";
103+
constexpr std::string_view kPropertiesParam = "properties";
104+
constexpr std::string_view kAnalyzerParam = "analyzer";
97105

98-
namespace irs::analysis {
106+
std::string_view GetType(velocypack::Slice& input) {
107+
IRS_ASSERT(input.isObject());
108+
input = input.get(kAnalyzerParam);
109+
if (input.isNone() || input.isNull() || input.isEmptyObject()) {
110+
return irs::string_token_stream::type_name();
111+
}
112+
if (!input.isObject()) {
113+
return {};
114+
}
115+
auto type = input.get(kTypeParam);
116+
if (!type.isString()) {
117+
return {};
118+
}
119+
return type.stringView();
120+
}
121+
122+
} // namespace
99123

100124
analyzer_registrar::analyzer_registrar(
101125
const type_info& type, const type_info& args_format,
@@ -195,5 +219,60 @@ bool visit(
195219
return analyzer_register::instance().visit(wrapper);
196220
}
197221

222+
bool MakeAnalyzer(velocypack::Slice input, analyzer::ptr& output) {
223+
auto type = GetType(input);
224+
if (type.empty()) {
225+
return false;
226+
}
227+
if (type == irs::string_token_stream::type_name()) {
228+
output = {};
229+
return true;
230+
}
231+
input = input.get(kPropertiesParam);
232+
if (input.isNone()) {
233+
input = velocypack::Slice::emptyObjectSlice();
234+
}
235+
output = get(type, irs::type<irs::text_format::vpack>::get(),
236+
{input.startAs<char>(), input.byteSize()});
237+
if (!output) {
238+
// fallback to json format if vpack isn't available
239+
output = get(type, irs::type<irs::text_format::json>::get(),
240+
irs::slice_to_string(input));
241+
}
242+
return output != nullptr;
243+
}
244+
245+
bool NormalizeAnalyzer(velocypack::Slice input, velocypack::Builder& output) {
246+
auto type = GetType(input);
247+
if (type.empty()) {
248+
return false;
249+
}
250+
velocypack::ObjectBuilder scope{&output, kAnalyzerParam};
251+
if (type == irs::string_token_stream::type_name()) {
252+
return true;
253+
}
254+
output.add(kTypeParam, velocypack::Value{type});
255+
input = input.get(kPropertiesParam);
256+
if (input.isNone()) {
257+
input = velocypack::Slice::emptyObjectSlice();
258+
}
259+
std::string normalized;
260+
if (normalize(normalized, type, irs::type<text_format::vpack>::get(),
261+
{input.startAs<char>(), input.byteSize()})) {
262+
output.add(
263+
kPropertiesParam,
264+
velocypack::Slice{reinterpret_cast<const uint8_t*>(normalized.data())});
265+
return true;
266+
}
267+
// fallback to json format if vpack isn't available
268+
if (normalize(normalized, type, irs::type<text_format::json>::get(),
269+
slice_to_string(input))) {
270+
auto vpack = velocypack::Parser::fromJson(normalized);
271+
output.add(kPropertiesParam, vpack->slice());
272+
return true;
273+
}
274+
return false;
275+
}
276+
198277
} // namespace analyzers
199278
} // namespace irs::analysis

core/analysis/analyzers.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,17 @@
2323

2424
#pragma once
2525

26+
#include <velocypack/Slice.h>
27+
2628
#include <functional>
2729

2830
#include "analyzer.hpp"
2931
#include "shared.hpp"
3032
#include "utils/text_format.hpp"
3133

34+
namespace arangodb::velocypack {
35+
class Builder;
36+
} // namespace arangodb::velocypack
3237
namespace irs::analysis {
3338

3439
using factory_f = analysis::analyzer::ptr (*)(std::string_view args);
@@ -71,6 +76,10 @@ void load_all(std::string_view path);
7176
bool visit(
7277
const std::function<bool(std::string_view, const type_info&)>& visitor);
7378

79+
bool MakeAnalyzer(arangodb::velocypack::Slice input, analyzer::ptr& output);
80+
bool NormalizeAnalyzer(arangodb::velocypack::Slice input,
81+
arangodb::velocypack::Builder& output);
82+
7483
} // namespace analyzers
7584
} // namespace irs::analysis
7685

0 commit comments

Comments
 (0)