Skip to content

Commit 78a335d

Browse files
Adding options of using myanmartools library in detector, add test then
build
1 parent 84ec0d8 commit 78a335d

File tree

10 files changed

+433
-150
lines changed

10 files changed

+433
-150
lines changed

README.md

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Knayi Myanmar Script
1515

1616
## Features
1717
- Detector (Unicode and Zawgyi)
18+
Detection now
1819
- Converter (Unicode and Zawgyi)
1920
- SyallBreak (Unicode and Zawgyi)
2021
- Spelling Check (Unicode and Zawgyi)
@@ -32,13 +33,13 @@ yarn add knayi-myscript
3233

3334
Using CDN
3435
```html
35-
<script src="https://unpkg.com/knayi-myscript@2.2.0/dist/knayi-myscript.min.js"></script>
36+
<script src="https://unpkg.com/knayi-myscript@latest/dist/knayi-myscript.min.js"></script>
3637
```
3738

3839
## API
3940
|Method Name | Arguments | Return | Note |
4041
| --- | --- | --- | --- |
41-
| `fontDetect` | `content: String(require)` | `String` | Font Detector, it will detect unicode/zawgyi of the **content** Text. If nothing is matched or possibility are equal, it will return as 'zawgyi' or specified font type in **defaultFont** params. |
42+
| `fontDetect` | `content: String(require)`, <br>`fallbackFontType:, options fontName(options)`, <br>`options: Object(options)` | `String` | Font Detector, it will detect unicode/zawgyi of the **content** Text. If nothing is matched or possibility are equal, it will return as 'zawgyi' or specified font type in **fallbackFontType*, options* params. |
4243
| `fontConvert` | `content: String(require)`,<br>`convertTo: fontName(require)`,<br>`convertFrom: fontName(optional)`| `String` | Converting font to target font type. This method need spelling fix, so it gonna use **spellingFix** in default. **convertFrom** will be detect by **fontDetect** when you don't described.<hr> `fontName` must be one of `unicode` or `zawgyi`. |
4344
| `syllBreak` | `content: String(require)`,<br>`fontType: fontName(optional)`,<br>`breakPoint: String(optional)` | `String` |To make systematic word break of Myanmar text. convertFrom will be detect by fontDetect when you don't described.<hr> `fontName` must be one of `unicode` or `zawgyi`. |
4445
| `spellingFix` | `content: String(require)`,<br>`fontType: fontName(optional)` | `String` | **convertFrom** will be detect by **fontDetect** when you don't described. It fix spelling on Myanmar Text.<hr> `fontName` must be one of `unicode` or `zawgyi`. |
@@ -55,7 +56,7 @@ import knayi from 'knayi-myscript'
5556

5657
## Example
5758

58-
- **fontDetect(content, [defaultFont])**
59+
- **fontDetect(content, [fallbackFontType], options)**
5960
```javascript
6061
knayi.fontDetect('မဂၤလာပါ') // zawgyi
6162
knayi.fontDetect('မင်္ဂလာပါ') // unicode
@@ -77,6 +78,36 @@ knayi.syllBreak('မင်္ဂလာပါ', null, '$$') // 'မင်္ဂ
7778
knayi.spellingFix('မင်္ဂလာာပါါ') // 'မင်္ဂလာပါ'
7879
```
7980

81+
## Using googlei18n/myanmartools in detector.js
82+
83+
Now you can now use `googlei18n/myanmartools` library in detector.
84+
By default `use_myanmartools` options is set to `false`.
85+
86+
```javascript
87+
// Add options for single process
88+
knayi.fontDetect('မဂၤလာပါ', null, {use_myanmartools: true}) // this will use myanmartools
89+
knayi.fontDetect('မင်္ဂလာပါ') // this will use default
90+
91+
// OR set for whole project
92+
knayi.setGlobalOptions({
93+
detector: {
94+
use_myanmartools: true
95+
}
96+
})
97+
```
98+
99+
You can also set Probability threshold percentages of zawgyi predicting by
100+
`myanmartools_zg_threshold` as `[lower, higher]`. Which mean if predicting
101+
result of myanmartools is < 0.05 detector.js assume as **unicode** or > 0.95
102+
it assume as **zawgyi**.
103+
104+
```javascript
105+
knayi.fontDetect('မင်္ဂလာပါ', null, {
106+
use_myanmartools: true,
107+
myanmartools_zg_threshold: [0.05, 0.95]
108+
})
109+
```
110+
80111
## Build
81112

82113
- Required node >=6

dist/knayi-myscript.es.js

Lines changed: 95 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -71,22 +71,29 @@ module.exports =
7171
"use strict";
7272

7373

74-
var myanmartools = __webpack_require__(3);
75-
var whitespace = '[\\x20\\t\\r\\n\\f]';
76-
var mmCharacterRange = /[\u1000-\u109F]/;
7774
var library = {};
75+
var mmCharacterRange = /[\u1000-\u109F]/;
76+
var whitespace = '[\\x20\\t\\r\\n\\f]';
77+
78+
var myanmartools = __webpack_require__(3);
79+
var myanmartoolZawgyiDetector = new myanmartools.ZawgyiDetector();
80+
81+
var GLOBAL_OPTIONS = {
82+
use_myanmartools: false,
83+
myanmartools_zg_threshold: [0.05, 0.95]
84+
};
7885

7986
/** DETECTION Libarary **/
8087
library.detect = {
81-
unicode: ['\u103E', '\u103F', '\u100A\u103A', '\u1014\u103A', '\u1004\u103A', '\u1031\u1038', '\u1031\u102C', '\u103A\u1038', '\u1035', '[\u1050-\u1059]', '^([\u1000-\u1021]\u103C|[\u1000-\u1021]\u1031)'],
82-
zawgyi: ['\u102C\u1039', '\u103A\u102C', whitespace + '(\u103B|\u1031|[\u107E-\u1084])[\u1000-\u1021]', '^(\u103B|\u1031|[\u107E-\u1084])[\u1000-\u1021]', '[\u1000-\u1021]\u1039[^\u1000-\u1021]', '\u1025\u1039', '\u1039\u1038', '[\u102B-\u1030\u1031\u103A\u1038](\u103B|[\u107E-\u1084])[\u1000-\u1021]', '\u1036\u102F', '[\u1000-\u1021]\u1039\u1031', '\u1064', '\u1039' + whitespace, '\u102C\u1031', '[\u102B-\u1030\u103A\u1038]\u1031[\u1000-\u1021]', '\u1031\u1031', '\u102F\u102D', '\u1039$']
88+
unicode: ['\u103E', '\u103F', '\u100A\u103A', '\u1014\u103A', '\u1004\u103A', '\u1031\u1038', '\u1031\u102C', '\u103A\u1038', '\u1035', '[\u1050-\u1059]', '^([\u1000-\u1021]\u103C|[\u1000-\u1021]\u1031)'],
89+
zawgyi: ['\u102C\u1039', '\u103A\u102C', whitespace + '(\u103B|\u1031|[\u107E-\u1084])[\u1000-\u1021]', '^(\u103B|\u1031|[\u107E-\u1084])[\u1000-\u1021]', '[\u1000-\u1021]\u1039[^\u1000-\u1021]', '\u1025\u1039', '\u1039\u1038', '[\u102B-\u1030\u1031\u103A\u1038](\u103B|[\u107E-\u1084])[\u1000-\u1021]', '\u1036\u102F', '[\u1000-\u1021]\u1039\u1031', '\u1064', '\u1039' + whitespace, '\u102C\u1031', '[\u102B-\u1030\u103A\u1038]\u1031[\u1000-\u1021]', '\u1031\u1031', '\u102F\u102D', '\u1039$']
8390
};
8491

8592
// Populate Detect library as RegExps
8693
Object.keys(library.detect).forEach(function (type) {
87-
for (var i = 0; i < library.detect[type].length; i++) {
88-
library.detect[type][i] = new RegExp(library.detect[type][i], 'g');
89-
}
94+
for (var i = 0; i < library.detect[type].length; i++) {
95+
library.detect[type][i] = new RegExp(library.detect[type][i], 'g');
96+
}
9097
});
9198

9299
/**
@@ -95,38 +102,79 @@ Object.keys(library.detect).forEach(function (type) {
95102
* @param def Default return format;
96103
* @return unicode ? zawgyi
97104
*/
98-
function fontDetect(content, def) {
99-
if (!content) throw new Error('Content must be specified on knayi.fontDetect');
105+
function fontDetect(content, fallback_font_type) {
106+
var options = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};
100107

101-
if (content === '') return content;
108+
if (!content) throw new Error('Content must be specified on knayi.fontDetect');
102109

103-
if (!mmCharacterRange.test(content)) return def;
110+
if (content === '') return content;
104111

105-
content = content.trim().replace(/\u200B/g, '');
106-
def = def || 'zawgyi';
112+
if (!mmCharacterRange.test(content)) return fallback_font_type || 'en';
107113

108-
var detector = new myanmartools.ZawgyiDetector();
109-
var zawgyiPropability = detector.getZawgyiProbability(content);
114+
content = content.trim().replace(/\u200B/g, '');
115+
fallback_font_type = fallback_font_type || 'zawgyi';
110116

111-
var match = {};
117+
options = verifyOptions(options);
112118

113-
for (var type in library.detect) {
114-
match[type] = 0;
119+
if (options.use_myanmartools) {
115120

116-
for (var i = 0; i < library.detect[type].length; i++) {
117-
var rule = library.detect[type][i];
118-
var m = content.match(rule);
119-
match[type] += m && m.length || 0;
120-
}
121-
}
121+
var myanmartools_zg_probability = myanmartoolZawgyiDetector.getZawgyiProbability(content);
122122

123-
if (match.unicode > match.zawgyi && zawgyiPropability < 0.05) {
124-
return 'unicode';
125-
} else if (match.unicode < match.zawgyi && zawgyiPropability > 0.95) {
126-
return 'zawgyi';
127-
} else {
128-
return def;
129-
}
123+
if (myanmartools_zg_probability < options.myanmartools_zg_threshold[0]) {
124+
return 'unicode';
125+
} else if (myanmartools_zg_probability > options.myanmartools_zg_threshold[1]) {
126+
return 'zawgyi';
127+
} else {
128+
return fallback_font_type;
129+
}
130+
} else {
131+
132+
var match = {};
133+
134+
for (var type in library.detect) {
135+
match[type] = 0;
136+
137+
for (var i = 0; i < library.detect[type].length; i++) {
138+
var rule = library.detect[type][i];
139+
var m = content.match(rule);
140+
match[type] += m && m.length || 0;
141+
}
142+
}
143+
144+
if (match.unicode > match.zawgyi) {
145+
return 'unicode';
146+
} else if (match.unicode < match.zawgyi) {
147+
return 'zawgyi';
148+
} else {
149+
return fallback_font_type;
150+
}
151+
}
152+
};
153+
154+
/**
155+
* set configuartion of using googlei18n/myanmar-tools
156+
*/
157+
function verifyOptions() {
158+
var _ref = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {},
159+
_ref$use_myanmartools = _ref.use_myanmartools,
160+
use_myanmartools = _ref$use_myanmartools === undefined ? GLOBAL_OPTIONS.use_myanmartools : _ref$use_myanmartools,
161+
_ref$myanmartools_zg_ = _ref.myanmartools_zg_threshold,
162+
myanmartools_zg_threshold = _ref$myanmartools_zg_ === undefined ? [0.05, 0.95] : _ref$myanmartools_zg_;
163+
164+
// Check types
165+
if (typeof myanmartools_zg_threshold[0] !== 'number' || typeof myanmartools_zg_threshold[1] !== 'number') {
166+
console.error('myanmartools_zg_threshold must be [number, number]');
167+
myanmartools_zg_threshold = GLOBAL_OPTIONS.myanmartools_zg_threshold;
168+
}
169+
170+
return {
171+
use_myanmartools: use_myanmartools,
172+
myanmartools_zg_threshold: myanmartools_zg_threshold
173+
};
174+
}
175+
176+
fontDetect.__setOptions = function (options) {
177+
GLOBAL_OPTIONS = verifyOptions(options);
130178
};
131179

132180
module.exports = fontDetect;
@@ -202,7 +250,14 @@ var fontConvert = __webpack_require__(10);
202250
var syllBreak = __webpack_require__(11);
203251
var spellingFix = __webpack_require__(1);
204252

253+
var setGlobalOptions = function setGlobalOptions() {
254+
var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};
255+
256+
fontDetect.__setOptions(options.detector);
257+
};
258+
205259
module.exports = {
260+
setGlobalOptions: setGlobalOptions,
206261
fontDetect: fontDetect,
207262
fontConvert: fontConvert,
208263
syllBreak: syllBreak,
@@ -2312,6 +2367,8 @@ for (var i = 0, len = code.length; i < len; ++i) {
23122367
revLookup[code.charCodeAt(i)] = i
23132368
}
23142369

2370+
// Support decoding URL-safe base64 strings, as Node.js does.
2371+
// See: https://en.wikipedia.org/wiki/Base64#URL_applications
23152372
revLookup['-'.charCodeAt(0)] = 62
23162373
revLookup['_'.charCodeAt(0)] = 63
23172374

@@ -2373,7 +2430,7 @@ function encodeChunk (uint8, start, end) {
23732430
var tmp
23742431
var output = []
23752432
for (var i = start; i < end; i += 3) {
2376-
tmp = (uint8[i] << 16) + (uint8[i + 1] << 8) + (uint8[i + 2])
2433+
tmp = ((uint8[i] << 16) & 0xFF0000) + ((uint8[i + 1] << 8) & 0xFF00) + (uint8[i + 2] & 0xFF)
23772434
output.push(tripletToBase64(tmp))
23782435
}
23792436
return output.join('')
@@ -2418,7 +2475,7 @@ function fromByteArray (uint8) {
24182475

24192476
exports.read = function (buffer, offset, isLE, mLen, nBytes) {
24202477
var e, m
2421-
var eLen = nBytes * 8 - mLen - 1
2478+
var eLen = (nBytes * 8) - mLen - 1
24222479
var eMax = (1 << eLen) - 1
24232480
var eBias = eMax >> 1
24242481
var nBits = -7
@@ -2431,12 +2488,12 @@ exports.read = function (buffer, offset, isLE, mLen, nBytes) {
24312488
e = s & ((1 << (-nBits)) - 1)
24322489
s >>= (-nBits)
24332490
nBits += eLen
2434-
for (; nBits > 0; e = e * 256 + buffer[offset + i], i += d, nBits -= 8) {}
2491+
for (; nBits > 0; e = (e * 256) + buffer[offset + i], i += d, nBits -= 8) {}
24352492

24362493
m = e & ((1 << (-nBits)) - 1)
24372494
e >>= (-nBits)
24382495
nBits += mLen
2439-
for (; nBits > 0; m = m * 256 + buffer[offset + i], i += d, nBits -= 8) {}
2496+
for (; nBits > 0; m = (m * 256) + buffer[offset + i], i += d, nBits -= 8) {}
24402497

24412498
if (e === 0) {
24422499
e = 1 - eBias
@@ -2451,7 +2508,7 @@ exports.read = function (buffer, offset, isLE, mLen, nBytes) {
24512508

24522509
exports.write = function (buffer, value, offset, isLE, mLen, nBytes) {
24532510
var e, m, c
2454-
var eLen = nBytes * 8 - mLen - 1
2511+
var eLen = (nBytes * 8) - mLen - 1
24552512
var eMax = (1 << eLen) - 1
24562513
var eBias = eMax >> 1
24572514
var rt = (mLen === 23 ? Math.pow(2, -24) - Math.pow(2, -77) : 0)
@@ -2484,7 +2541,7 @@ exports.write = function (buffer, value, offset, isLE, mLen, nBytes) {
24842541
m = 0
24852542
e = eMax
24862543
} else if (e + eBias >= 1) {
2487-
m = (value * c - 1) * Math.pow(2, mLen)
2544+
m = ((value * c) - 1) * Math.pow(2, mLen)
24882545
e = e + eBias
24892546
} else {
24902547
m = value * Math.pow(2, eBias - 1) * Math.pow(2, mLen)

0 commit comments

Comments
 (0)