Skip to content

Commit d9e55be

Browse files
committed
Rewrite cld to actually be async with a promise API
1 parent 407d31c commit d9e55be

File tree

3 files changed

+258
-186
lines changed

3 files changed

+258
-186
lines changed

index.js

Lines changed: 68 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,87 @@
1-
var _ = require('underscore');
2-
var cld2 = require('./build/Release/cld');
1+
const _ = require('underscore');
2+
const cld2 = require('./build/Release/cld');
33

44
module.exports = {
55
LANGUAGES : cld2.LANGUAGES,
66
DETECTED_LANGUAGES : cld2.DETECTED_LANGUAGES,
77
ENCODINGS : cld2.ENCODINGS,
88

9-
detect : function (text, options, cb) {
10-
if (arguments.length < 2) {
11-
return;
12-
}
13-
if (arguments.length < 3) {
9+
async detect(text, options) {
10+
let cb = arguments[2];
11+
if (typeof cb !== 'function' && typeof options === 'function') {
1412
cb = options;
1513
options = {};
1614
}
17-
if (!_.isFunction(cb)) {
18-
return;
19-
}
2015

21-
if (!_.isString(text) || text.length < 1) {
22-
return cb({message:'Empty or invalid text'});
23-
}
16+
try {
17+
if (arguments.length < 1) {
18+
throw new Error('Not enough arguments provided');
19+
}
2420

25-
var defaults = {
26-
isHTML : false,
27-
languageHint : '',
28-
encodingHint : '',
29-
tldHint : '',
30-
httpHint : ''
31-
};
32-
options = _.defaults(options, defaults);
21+
if (!_.isString(text) || text.length < 1) {
22+
throw new Error('Empty or invalid text');
23+
}
3324

34-
if (!_.isBoolean(options.isHTML)) {
35-
return cb({message:'Invalid isHTML value'});
36-
}
37-
if (!_.isString(options.languageHint)) {
38-
return cb({message:'Invalid languageHint'});
39-
}
40-
if (!_.isString(options.encodingHint)) {
41-
return cb({message:'Invalid encodingHint'});
42-
}
43-
if (!_.isString(options.tldHint)) {
44-
return cb({message:'Invalid tldHint'});
45-
}
46-
if (!_.isString(options.httpHint)) {
47-
return cb({message:'Invalid httpHint'});
48-
}
49-
if (options.encodingHint.length > 0 &&
50-
!~cld2.ENCODINGS.indexOf(options.encodingHint)) {
25+
const defaults = {
26+
isHTML : false,
27+
languageHint : '',
28+
encodingHint : '',
29+
tldHint : '',
30+
httpHint : ''
31+
};
32+
options = _.defaults(options, defaults);
5133

52-
return cb({message:'Invalid encodingHint, see ENCODINGS'});
53-
}
54-
if (options.languageHint.length > 0 &&
55-
!~_.keys(cld2.LANGUAGES).indexOf(options.languageHint) &&
56-
!~_.values(cld2.LANGUAGES).indexOf(options.languageHint)) {
34+
if (!_.isBoolean(options.isHTML)) {
35+
throw new Error('Invalid isHTML value');
36+
}
37+
if (!_.isString(options.languageHint)) {
38+
throw new Error('Invalid languageHint');
39+
}
40+
if (!_.isString(options.encodingHint)) {
41+
throw new Error('Invalid encodingHint');
42+
}
43+
if (!_.isString(options.tldHint)) {
44+
throw new Error('Invalid tldHint');
45+
}
46+
if (!_.isString(options.httpHint)) {
47+
throw new Error('Invalid httpHint');
48+
}
49+
if (options.encodingHint.length > 0 &&
50+
!~cld2.ENCODINGS.indexOf(options.encodingHint)) {
5751

58-
return cb({message:'Invalid languageHint, see LANGUAGES'});
59-
}
52+
throw new Error('Invalid encodingHint, see ENCODINGS');
53+
}
54+
if (options.languageHint.length > 0 &&
55+
!~_.keys(cld2.LANGUAGES).indexOf(options.languageHint) &&
56+
!~_.values(cld2.LANGUAGES).indexOf(options.languageHint)) {
6057

61-
var result = cld2.detect(
62-
text,
63-
!options.isHTML,
64-
options.languageHint,
65-
options.encodingHint,
66-
options.tldHint,
67-
options.httpHint
68-
);
58+
throw new Error('Invalid languageHint, see LANGUAGES');
59+
}
6960

70-
if (result.languages.length < 1) {
71-
return cb({message:'Failed to identify language'});
72-
}
61+
const result = await cld2.detectAsync(
62+
text,
63+
!options.isHTML,
64+
options.languageHint,
65+
options.encodingHint,
66+
options.tldHint,
67+
options.httpHint
68+
);
69+
70+
if (result.languages.length < 1) {
71+
throw new Error('Failed to identify language');
72+
}
7373

74-
return cb(null, result);
74+
if (cb) {
75+
return cb(null, result);
76+
} else {
77+
return result;
78+
}
79+
} catch (error) {
80+
if (cb) {
81+
cb(error);
82+
} else {
83+
throw error;
84+
}
85+
}
7586
}
7687
};

src/cld.cc

Lines changed: 114 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
#include <memory>
2+
#include <string>
3+
14
#include "compact_lang_det.h"
25
#include "encodings.h"
36
#include "constants.h"
@@ -9,73 +12,97 @@ using std::unexpected_handler;
912
#include <napi.h>
1013

1114
namespace NodeCld {
12-
Napi::Object Detect(const Napi::CallbackInfo& info) {
13-
auto env = info.Env();
14-
15-
std::string text = info[0].ToString().Utf8Value();
16-
const char *bytes = text.c_str();
17-
int numBytes = text.length();
18-
bool isPlainText = info[1].ToBoolean();
15+
struct CLDInput {
16+
std::string bytes,
17+
languageHint,
18+
encodingHint,
19+
tldHint,
20+
httpHint;
21+
int numBytes;
22+
bool isPlainText;
23+
};
24+
25+
struct CLDOutput {
26+
CLD2::Language language3[3];
27+
int percent3[3];
28+
double normalized_score3[3];
29+
CLD2::ResultChunkVector resultChunkVector;
30+
int textBytesFound;
31+
bool isReliable;
32+
};
1933

20-
CLD2::CLDHints hints;
21-
hints.tld_hint = 0;
22-
hints.content_language_hint = 0;
23-
hints.language_hint = CLD2::UNKNOWN_LANGUAGE;
24-
hints.encoding_hint = CLD2::UNKNOWN_ENCODING;
34+
std::unique_ptr<CLDInput> UnpackInputFromJSArgs(const Napi::CallbackInfo &info) {
35+
std::unique_ptr<CLDInput> input(new CLDInput);
2536

37+
input->bytes = info[0].ToString().Utf8Value();
38+
input->numBytes = input->bytes.length();
39+
input->isPlainText = info[1].ToBoolean();
2640

2741
if (info[2].IsString()) {
28-
std::string languageHint = info[2].ToString().Utf8Value();
29-
if (languageHint.length() > 0) {
30-
hints.language_hint = Constants::getInstance().getLanguageFromName(languageHint.c_str());
31-
}
42+
input->languageHint = info[2].ToString().Utf8Value();
3243
}
3344

3445
if (info[3].IsString()) {
35-
std::string encodingHint = info[3].ToString().Utf8Value();
36-
if (encodingHint.length() > 0) {
37-
hints.encoding_hint = Constants::getInstance().getEncodingFromName(encodingHint.c_str());
38-
}
46+
input->encodingHint = info[3].ToString().Utf8Value();
3947
}
4048

4149
if (info[4].IsString()) {
42-
std::string tldHint = info[4].ToString().Utf8Value();
43-
if (tldHint.length() > 0) {
44-
hints.tld_hint = tldHint.c_str();
45-
}
50+
input->tldHint = info[4].ToString().Utf8Value();
4651
}
4752

4853
if (info[5].IsString()) {
49-
std::string httpHint = info[5].ToString().Utf8Value();
50-
if (httpHint.length() > 0) {
51-
hints.content_language_hint = httpHint.c_str();
52-
}
54+
input->httpHint = info[5].ToString().Utf8Value();
5355
}
5456

55-
CLD2::Language language3[3];
56-
int percent3[3];
57-
double normalized_score3[3];
58-
CLD2::ResultChunkVector resultChunkVector;
59-
int textBytesFound;
60-
bool isReliable;
57+
return input;
58+
}
59+
60+
std::unique_ptr<CLDOutput> DetectLanguage(std::unique_ptr<CLDInput> input) {
61+
std::unique_ptr<CLDOutput> output(new CLDOutput);
62+
CLD2::CLDHints hints;
63+
hints.tld_hint = 0;
64+
hints.content_language_hint = 0;
65+
hints.language_hint = CLD2::UNKNOWN_LANGUAGE;
66+
hints.encoding_hint = CLD2::UNKNOWN_ENCODING;
67+
68+
if (input->languageHint.length() > 0) {
69+
hints.language_hint = Constants::getInstance().getLanguageFromName(input->languageHint.c_str());
70+
}
71+
72+
if (input->encodingHint.length() > 0) {
73+
hints.encoding_hint = Constants::getInstance().getEncodingFromName(input->encodingHint.c_str());
74+
}
75+
76+
if (input->tldHint.length() > 0) {
77+
hints.tld_hint = input->tldHint.c_str();
78+
}
79+
80+
if (input->httpHint.length() > 0) {
81+
hints.content_language_hint = input->httpHint.c_str();
82+
}
6183

6284
CLD2::ExtDetectLanguageSummary(
63-
bytes, numBytes,
64-
isPlainText,
85+
input->bytes.c_str(),
86+
input->numBytes,
87+
input->isPlainText,
6588
&hints,
6689
0,
67-
language3,
68-
percent3,
69-
normalized_score3,
70-
&resultChunkVector,
71-
&textBytesFound,
72-
&isReliable
90+
output->language3,
91+
output->percent3,
92+
output->normalized_score3,
93+
&output->resultChunkVector,
94+
&output->textBytesFound,
95+
&output->isReliable
7396
);
7497

98+
return output;
99+
}
100+
101+
Napi::Object UnpackOutputToJS(const Napi::Env env, std::unique_ptr<CLDOutput> output) {
75102
size_t languageIdx = 0;
76103
auto languages = Napi::Array::New(env);
77104
for (size_t resultIdx = 0; resultIdx < 3; resultIdx++) {
78-
CLD2::Language lang = language3[resultIdx];
105+
CLD2::Language lang = output->language3[resultIdx];
79106

80107
if (lang == CLD2::UNKNOWN_LANGUAGE) {
81108
continue;
@@ -84,16 +111,16 @@ namespace NodeCld {
84111
auto item = Napi::Object::New(env);
85112
item["name"] = Napi::String::New(env, Constants::getInstance().getLanguageName(lang));
86113
item["code"] = Napi::String::New(env, Constants::getInstance().getLanguageCode(lang));
87-
item["percent"] = Napi::Number::New(env, percent3[resultIdx]);
88-
item["score"] = Napi::Number::New(env, normalized_score3[resultIdx]);
114+
item["percent"] = Napi::Number::New(env, output->percent3[resultIdx]);
115+
item["score"] = Napi::Number::New(env, output->normalized_score3[resultIdx]);
89116

90117
languages[languageIdx++] = item;
91118
}
92119

93120
size_t chunkIdx = 0;
94121
auto chunks = Napi::Array::New(env);
95-
for (size_t resultIdx = 0; resultIdx < resultChunkVector.size(); resultIdx++) {
96-
CLD2::ResultChunk chunk = resultChunkVector.at(resultIdx);
122+
for (size_t resultIdx = 0; resultIdx < output->resultChunkVector.size(); resultIdx++) {
123+
CLD2::ResultChunk chunk = output->resultChunkVector.at(resultIdx);
97124
CLD2::Language lang = static_cast<CLD2::Language>(chunk.lang1);
98125

99126
if (lang == CLD2::UNKNOWN_LANGUAGE) {
@@ -110,14 +137,51 @@ namespace NodeCld {
110137
}
111138

112139
auto results = Napi::Object::New(env);
113-
results["reliable"] = Napi::Boolean::New(env, isReliable);
114-
results["textBytes"] = Napi::Number::New(env, textBytesFound);
140+
results["reliable"] = Napi::Boolean::New(env, output->isReliable);
141+
results["textBytes"] = Napi::Number::New(env, output->textBytesFound);
115142
results["languages"] = languages;
116143
results["chunks"] = chunks;
117144

118145
return results;
119146
}
120147

148+
class DetectAsyncWorker : public Napi::AsyncWorker {
149+
public:
150+
DetectAsyncWorker(const Napi::CallbackInfo &info):
151+
Napi::AsyncWorker(info.Env()),
152+
deferred(Napi::Promise::Deferred::New(info.Env())),
153+
mInput(UnpackInputFromJSArgs(info))
154+
{}
155+
156+
void Execute() {
157+
mOutput = DetectLanguage(std::move(mInput));
158+
}
159+
160+
void OnOK() {
161+
deferred.Resolve(UnpackOutputToJS(Env(), std::move(mOutput)));
162+
}
163+
164+
Napi::Promise Promise() {
165+
this->Queue();
166+
return deferred.Promise();
167+
}
168+
169+
private:
170+
Napi::Promise::Deferred deferred;
171+
std::unique_ptr<CLDInput> mInput;
172+
std::unique_ptr<CLDOutput> mOutput;
173+
};
174+
175+
Napi::Object Detect(const Napi::CallbackInfo &info) {
176+
auto input = UnpackInputFromJSArgs(info);
177+
auto output = DetectLanguage(std::move(input));
178+
return UnpackOutputToJS(info.Env(), std::move(output));
179+
}
180+
181+
Napi::Promise DetectAsync(const Napi::CallbackInfo &info) {
182+
return (new DetectAsyncWorker(info))->Promise();
183+
}
184+
121185
Napi::Object Init(Napi::Env env, Napi::Object exports) {
122186
auto rawDetected = Constants::getInstance().getDetected();
123187
auto numDetected = rawDetected->size();
@@ -146,7 +210,7 @@ namespace NodeCld {
146210
exports["ENCODINGS"] = encodings;
147211

148212
exports["detect"] = Napi::Function::New(env, Detect);
149-
213+
exports["detectAsync"] = Napi::Function::New(env, DetectAsync);
150214
return exports;
151215
}
152216

0 commit comments

Comments
 (0)