napi-rs
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/jieba/Cargo.toml‎
Lines changed: 0 additions & 1 deletion b/‎packages/jieba/Cargo.toml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎packages/jieba/README.md‎
Lines changed: 42 additions & 45 deletions b/‎packages/jieba/README.md‎
Lines changed: 42 additions & 45 deletions
diff --git a/‎packages/jieba/__tests__/__snapshots__/jieba.spec.ts.md‎
Lines changed: 19 additions & 0 deletions b/‎packages/jieba/__tests__/__snapshots__/jieba.spec.ts.md‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎packages/jieba/__tests__/__snapshots__/jieba.spec.ts.snap‎
57 Bytes b/‎packages/jieba/__tests__/__snapshots__/jieba.spec.ts.snap‎
57 Bytes
diff --git a/‎packages/jieba/__tests__/jieba.spec.ts‎
Lines changed: 14 additions & 10 deletions b/‎packages/jieba/__tests__/jieba.spec.ts‎
Lines changed: 14 additions & 10 deletions
diff --git a/‎packages/jieba/benchmark/jieba.js‎
Lines changed: 52 additions & 31 deletions b/‎packages/jieba/benchmark/jieba.js‎
Lines changed: 52 additions & 31 deletions
diff --git a/‎packages/jieba/benchmark/package.json‎
Lines changed: 3 additions & 0 deletions b/‎packages/jieba/benchmark/package.json‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎packages/jieba/dict.d.ts‎
Lines changed: 2 additions & 0 deletions b/‎packages/jieba/dict.d.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎packages/jieba/dict.js‎
Lines changed: 5 additions & 0 deletions b/‎packages/jieba/dict.js‎
Lines changed: 5 additions & 0 deletions
@@ -12,7 +12,7 @@ resolver = "2"
   getrandom    = "0.2"
   global_alloc = { path = "./crates/alloc" }
   indexmap     = { version = "2", features = ["serde"] }
-  jieba-rs     = { version = "0.6", features = ["default-dict", "tfidf", "textrank"] }
+  jieba-rs     = { version = "0.7", default-features = false, features = ["tfidf", "textrank"] }
   jsonwebtoken = { version = "9" }
   mimalloc     = "0.1"
   napi         = { version = "3.0.0-alpha", default-features = false, features = ["napi3"] }
 
@@ -12,7 +12,6 @@ global_alloc = { workspace = true }
 jieba-rs     = { workspace = true }
 napi         = { workspace = true, default-features = false, features = ["napi3"] }
 napi-derive  = { workspace = true }
-once_cell    = { workspace = true }
 
 [build-dependencies]
 napi-build = { workspace = true }
@@ -14,61 +14,59 @@
 Due to [jieba-rs is 33% faster than cppjieba](https://blog.paulme.ng/posts/2019-06-30-optimizing-jieba-rs-to-be-33percents-faster-than-cppjieba.html), and N-API is faster than `v8` C++ API, `@node-rs/jieba` is faster than `nodejieba`.
 
 ```bash
-@node-rs/jieba x 3,763 ops/sec ±1.18% (92 runs sampled)
-nodejieba x 2,783 ops/sec ±0.67% (91 runs sampled)
-Cut 1184 words bench suite: Fastest is @node-rs/jieba
-
-@node-rs/jieba x 16.10 ops/sec ±1.58% (44 runs sampled)
-nodejieba x 9.81 ops/sec ±2.39% (29 runs sampled)
-Cut 246568 words bench suite: Fastest is @node-rs/jieba
-
-@node-rs/jieba x 1,739 ops/sec ±0.87% (92 runs sampled)
-nodejieba x 931 ops/sec ±1.31% (89 runs sampled)
-Tag 1184 words bench suite: Fastest is @node-rs/jieba
-
-@node-rs/jieba x 6.19 ops/sec ±2.01% (20 runs sampled)
-nodejieba x 3.06 ops/sec ±5.39% (12 runs sampled)
-Tag 246568 words bench suite: Fastest is @node-rs/jieba
+Benchmark Cut 1184 words result
+┌─────────┬──────────────────┬─────────┬────────────────────┬──────────┬─────────┐
+│ (index) │ Task Name        │ ops/sec │ Average Time (ns)  │ Margin   │ Samples │
+├─────────┼──────────────────┼─────────┼────────────────────┼──────────┼─────────┤
+│ 0       │ '@node-rs/jieba' │ '8,246' │ 121266.9342871014  │ '±0.17%' │ 4124    │
+│ 1       │ 'nodejieba'      │ '6,392' │ 156439.52799499547 │ '±0.20%' │ 3197    │
+└─────────┴──────────────────┴─────────┴────────────────────┴──────────┴─────────┘
+Benchmark Cut 246568 words result
+┌─────────┬──────────────────┬─────────┬────────────────────┬──────────┬─────────┐
+│ (index) │ Task Name        │ ops/sec │ Average Time (ns)  │ Margin   │ Samples │
+├─────────┼──────────────────┼─────────┼────────────────────┼──────────┼─────────┤
+│ 0       │ '@node-rs/jieba' │ '32'    │ 30760703.470588237 │ '±3.01%' │ 17      │
+│ 1       │ 'nodejieba'      │ '19'    │ 51275112.699999996 │ '±2.68%' │ 10      │
+└─────────┴──────────────────┴─────────┴────────────────────┴──────────┴─────────┘
+Benchmark Tag 1184 words result
+┌─────────┬──────────────────┬─────────┬───────────────────┬──────────┬─────────┐
+│ (index) │ Task Name        │ ops/sec │ Average Time (ns) │ Margin   │ Samples │
+├─────────┼──────────────────┼─────────┼───────────────────┼──────────┼─────────┤
+│ 0       │ '@node-rs/jieba' │ '3,174' │ 315048.8916876547 │ '±0.20%' │ 1588    │
+│ 1       │ 'nodejieba'      │ '2,672' │ 374213.8870605615 │ '±0.23%' │ 1337    │
+└─────────┴──────────────────┴─────────┴───────────────────┴──────────┴─────────┘
+Benchmark Tag 246568 words result
+┌─────────┬──────────────────┬─────────┬────────────────────┬──────────┬─────────┐
+│ (index) │ Task Name        │ ops/sec │ Average Time (ns)  │ Margin   │ Samples │
+├─────────┼──────────────────┼─────────┼────────────────────┼──────────┼─────────┤
+│ 0       │ '@node-rs/jieba' │ '11'    │ 84886341.7999999   │ '±5.74%' │ 10      │
+│ 1       │ 'nodejieba'      │ '7'     │ 125781083.30000004 │ '±4.75%' │ 10      │
+└─────────┴──────────────────┴─────────┴────────────────────┴──────────┴─────────┘
 ```
 
-## Support matrix
-
-|                  | node12 | node14 | node16 | node18 |
-| ---------------- | ------ | ------ | ------ | ------ |
-| Windows x64      | ✓      | ✓      | ✓      | ✓      |
-| Windows x32      | ✓      | ✓      | ✓      | ✓      |
-| Windows arm64    | ✓      | ✓      | ✓      | ✓      |
-| macOS x64        | ✓      | ✓      | ✓      | ✓      |
-| macOS arm64      | ✓      | ✓      | ✓      | ✓      |
-| Linux x64 gnu    | ✓      | ✓      | ✓      | ✓      |
-| Linux x64 musl   | ✓      | ✓      | ✓      | ✓      |
-| Linux arm gnu    | ✓      | ✓      | ✓      | ✓      |
-| Linux arm64 gnu  | ✓      | ✓      | ✓      | ✓      |
-| Linux arm64 musl | ✓      | ✓      | ✓      | ✓      |
-| Android arm64    | ✓      | ✓      | ✓      | ✓      |
-| Android armv7    | ✓      | ✓      | ✓      | ✓      |
-| FreeBSD x64      | ✓      | ✓      | ✓      | ✓      |
-
 ## Usage
 
 ```javascript
-const { load, cut } = require('@node-rs/jieba')
+import { Jieba } from '@node-rs/jieba'
+import { dict } from '@node-rs/jieba/dict'
 
-load()
-// loadDict(fs.readFileSync(...))
-// loadTFIDFDict(fs.readFileSync(...))
+// load jieba with the default dict
+const jieba = Jieba.withDict(dict)
 
-cut('我们中出了一个叛徒', false)
+console.info(jieba.cut('我们中出了一个叛徒', false))
 
 // ["我们", "中", "出", "了", "一个", "叛徒"]
 ```
 
 ```javascript
-const { load, cut } = require('@node-rs/jieba')
+import { Jieba, TfIdf } from '@node-rs/jieba'
+import { dict, idf } from '@node-rs/jieba/dict'
 
-load()
+const jieba = Jieba.withDict(dict)
+const tfIdf = TfIdf.withDict(idf)
 
-extract(
+tfIdf.extractKeywords(
+  jieba,
   '今天纽约的天气真好啊，京华大酒店的张尧经理吃了一只北京烤鸭。后天纽约的天气不好，昨天纽约的天气也不好，北京烤鸭真好吃',
   3,
 )
@@ -83,15 +81,14 @@ extract(
 ### Load custom dictionaries
 
 ```javascript
-const { loadDict, cut } = require('@node-rs/jieba')
+import { Jieba } from '@node-rs/jieba'
 const customDict = ['哪行 50', '干一行 51', '行一行 52', '行行 53']
 
 const dictBuffer = Buffer.from(customDict.join('\n'), 'utf-8')
-// loadDict doc: https://github.com/fxsjy/jieba?tab=readme-ov-file#%E8%BD%BD%E5%85%A5%E8%AF%8D%E5%85%B8
-loadDict(dictBuffer)
+const jieba = Jieba.withDict(dictBuffer)
 
 const text = '人要是行干一行行一行，一行行行行行，行行行干哪行都行'
-const output = cut(text, false)
+const output = jieba.cut(text, false)
 console.log('分词结果⤵️\n', output.join('/'))
 // Before: 人/要是/行/干/一行行/一行/，/一行行/行/行/行/，/行/行/行/干/哪/行/都行
 // After:  人/要是/行/干一行/行一行/，/一行行/行行/行/，/行行/行/干/哪行/都行
 
@@ -141,3 +141,22 @@ Generated by [AVA](https://avajs.dev).
         weight: 'number',
       },
     ]
+
+## should be able to load custom TFID dict
+
+> Snapshot 1
+
+    [
+      {
+        keyword: 'CEO',
+        weight: 1.6825,
+      },
+      {
+        keyword: '不用',
+        weight: 1.6825,
+      },
+      {
+        keyword: '专业',
+        weight: 1.6825,
+      },
+    ]
@@ -1,39 +1,43 @@
 import test from 'ava'
 
-import { cut, tag, extract, loadTFIDFDict, loadDict } from '../index'
+import { Jieba, TfIdf } from '../index.js'
+import { dict, idf } from '../dict.js'
 
 const sentence = '我是拖拉机学院手扶拖拉机专业的。不用多久，我就会升职加薪，走上人生巅峰。'
 
+const jieba = Jieba.withDict(dict)
+const tfIdf = TfIdf.withDict(idf)
+
 test('cut result should be equal to nodejieba', (t) => {
-  t.snapshot(cut(sentence))
+  t.snapshot(jieba.cut(sentence))
 })
 
 test('tag result shoule be equal to nodejieba', (t) => {
-  t.snapshot(tag(sentence))
+  t.snapshot(jieba.tag(sentence))
 })
 
 test('extract should be equal to nodejieba', (t) => {
   const sentence =
     '今天纽约的天气真好啊，京华大酒店的张尧经理吃了一只北京烤鸭。后天纽约的天气不好，昨天纽约的天气也不好，北京烤鸭真好吃'
   const topn = 3
   t.snapshot(
-    extract(sentence, topn).map((t) => ({
+    tfIdf.extractKeywords(jieba, sentence, topn).map((t) => ({
       keyword: t.keyword,
       weight: typeof t.weight,
     })),
   )
 })
 
-test.skip('should be able to load custom TFID dict', (t) => {
+test('should be able to load custom TFID dict', (t) => {
   const userdict = Buffer.from('专业 20.19')
-  loadTFIDFDict(userdict)
+  const tfIdf = TfIdf.withDict(userdict)
   const fixture = '我是拖拉机学院手扶拖拉机专业的。不用多久，我就会升职加薪，当上CEO，走上人生巅峰。'
-  t.snapshot(extract(fixture, 3))
+  t.snapshot(tfIdf.extractKeywords(jieba, fixture, 3))
 })
 
-test.skip('should be able to load custom dict', (t) => {
+test('should be able to load custom dict', (t) => {
   const userdict = Buffer.from('出了 10000')
-  loadDict(userdict)
+  const jieba = Jieba.withDict(userdict)
   const fixture = '我们中出了一个叛徒'
-  t.notThrows(() => cut(fixture))
+  t.notThrows(() => jieba.cut(fixture))
 })
@@ -1,16 +1,19 @@
-const fs = require('fs')
-const { join } = require('path')
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+import { fileURLToPath } from 'node:url'
 
-const { Suite } = require('benchmark')
-const chalk = require('chalk')
-const nodejieba = require('nodejieba')
+import { Bench } from 'tinybench'
+import chalk from 'chalk'
+import nodejieba from 'nodejieba'
 
-const { load, cut, tag } = require('../index')
+import { Jieba, TfIdf } from '../index.js'
+import { dict, idf } from '../dict.js'
 
-load()
-nodejieba.load()
+const { load, cut, tag } = nodejieba
+
+const __dirname = join(fileURLToPath(import.meta.url), '..')
 
-const fixture = fs.readFileSync(join(__dirname, 'weicheng.txt'), 'utf8')
+const fixture = readFileSync(join(__dirname, 'weicheng.txt'), 'utf8')
 
 const preface = `
 重印前记《围城》一九四七年在上海初版，一九四八年再版，一九四九年三版，以后国内没有重印过。偶然碰见它的新版，那都是香港的“盗印”本。没有看到台湾的“盗印”，据说在那里它是禁书。美国哥伦比亚大学夏志清教授的英文著作里对它作了过高的评价，导致了一些西方语言的译本。日本京都大学荒井健教授很久以前就通知我他要翻译，近年来也陆续在刊物上发表了译文。现在，人民文学出版社建议重新排印，以便原著在国内较易找着，我感到意外和忻辛。
@@ -23,30 +26,48 @@ const preface = `
 
 const prefaceLength = preface.length
 
-function createBench(suitename, transform, napi, jieba, input) {
-  const cutSuite = new Suite(suitename)
-  console.assert(transform(napi(input)) === transform(jieba(input)))
-
-  cutSuite
-    .add('@node-rs/jieba', () => {
-      napi(input)
-    })
-    .add('nodejieba', () => {
-      jieba(input)
-    })
-    .on('cycle', function (event) {
-      console.info(String(event.target))
-    })
-    .on('complete', function () {
-      console.info(`${this.name} bench suite: Fastest is ${chalk.green(this.filter('fastest').map('name'))}`)
-    })
-    .run()
+async function createBench(suitename, transform, napi, jieba) {
+  const suite = new Bench()
+  console.assert(transform(napi()) === transform(jieba()))
+
+  suite.add('@node-rs/jieba', napi).add('nodejieba', jieba)
+
+  await suite.warmup()
+
+  await suite.run()
+
+  console.info(chalk.green(`Benchmark ${suitename} result`))
+  console.table(suite.table())
 }
 
-createBench(`Cut ${prefaceLength} words`, (output) => output.join(''), cut, nodejieba.cut, preface)
+load()
+const jieba = Jieba.withDict(dict)
+const tfIdf = TfIdf.withDict(idf)
+
+await createBench(
+  `Cut ${prefaceLength} words`,
+  (output) => output.join(''),
+  () => jieba.cut(preface),
+  () => cut(preface),
+)
 
-createBench(`Cut ${fixture.toString().length} words`, (output) => output.join(''), cut, nodejieba.cut, fixture)
+await createBench(
+  `Cut ${fixture.toString().length} words`,
+  (output) => output.join(''),
+  () => jieba.cut(fixture),
+  () => cut(fixture),
+)
 
-createBench(`Tag ${prefaceLength} words`, (output) => typeof output, tag, nodejieba.tag, preface)
+await createBench(
+  `Tag ${prefaceLength} words`,
+  (output) => typeof output,
+  () => jieba.tag(preface),
+  () => tag(preface),
+)
 
-createBench(`Tag ${fixture.toString().length} words`, (output) => typeof output, tag, nodejieba.tag, fixture)
+await createBench(
+  `Tag ${fixture.toString().length} words`,
+  (output) => typeof output,
+  () => jieba.tag(fixture),
+  () => tag(fixture),
+)
@@ -0,0 +1,3 @@
+{
+  "type": "module"
+}
@@ -0,0 +1,2 @@
+export const dict: Uint8Array
+export const idf: Uint8Array
@@ -0,0 +1,5 @@
+const fs = require('fs')
+const { join } = require('path')
+
+module.exports.dict = fs.readFileSync(join(__dirname, 'dict.txt'))
+module.exports.idf = fs.readFileSync(join(__dirname, 'idf.txt'))
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+export const dict: Uint8Array`
	`2`	`+export const idf: Uint8Array`