Skip to content

Commit 8eab8e7

Browse files
committed
add device option (cpu / webgpu)
1 parent 648f24b commit 8eab8e7

File tree

11 files changed

+300
-238
lines changed

11 files changed

+300
-238
lines changed

chunkit.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ export async function chunkit(
4040
combineChunksSimilarityThreshold = DEFAULT_CONFIG.COMBINE_CHUNKS_SIMILARITY_THRESHOLD,
4141
onnxEmbeddingModel = DEFAULT_CONFIG.ONNX_EMBEDDING_MODEL,
4242
dtype = DEFAULT_CONFIG.DTYPE,
43+
device = DEFAULT_CONFIG.DEVICE,
4344
localModelPath = DEFAULT_CONFIG.LOCAL_MODEL_PATH,
4445
modelCacheDir = DEFAULT_CONFIG.MODEL_CACHE_DIR,
4546
returnEmbedding = DEFAULT_CONFIG.RETURN_EMBEDDING,
@@ -59,6 +60,7 @@ export async function chunkit(
5960
const { modelName, dtype: usedDtype } = await initializeEmbeddingUtils(
6061
onnxEmbeddingModel,
6162
dtype,
63+
device,
6264
localModelPath,
6365
modelCacheDir
6466
);
@@ -187,6 +189,7 @@ export async function cramit(
187189
maxTokenSize = DEFAULT_CONFIG.MAX_TOKEN_SIZE,
188190
onnxEmbeddingModel = DEFAULT_CONFIG.ONNX_EMBEDDING_MODEL,
189191
dtype = DEFAULT_CONFIG.DTYPE,
192+
device = DEFAULT_CONFIG.DEVICE,
190193
localModelPath = DEFAULT_CONFIG.LOCAL_MODEL_PATH,
191194
modelCacheDir = DEFAULT_CONFIG.MODEL_CACHE_DIR,
192195
returnEmbedding = DEFAULT_CONFIG.RETURN_EMBEDDING,
@@ -206,6 +209,7 @@ export async function cramit(
206209
await initializeEmbeddingUtils(
207210
onnxEmbeddingModel,
208211
dtype,
212+
device,
209213
localModelPath,
210214
modelCacheDir
211215
);
@@ -293,6 +297,7 @@ export async function sentenceit(
293297
logging = DEFAULT_CONFIG.LOGGING,
294298
onnxEmbeddingModel = DEFAULT_CONFIG.ONNX_EMBEDDING_MODEL,
295299
dtype = DEFAULT_CONFIG.DTYPE,
300+
device = DEFAULT_CONFIG.DEVICE,
296301
localModelPath = DEFAULT_CONFIG.LOCAL_MODEL_PATH,
297302
modelCacheDir = DEFAULT_CONFIG.MODEL_CACHE_DIR,
298303
returnEmbedding = DEFAULT_CONFIG.RETURN_EMBEDDING,
@@ -313,6 +318,7 @@ export async function sentenceit(
313318
await initializeEmbeddingUtils(
314319
onnxEmbeddingModel,
315320
dtype,
321+
device,
316322
localModelPath,
317323
modelCacheDir
318324
);

config.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ export const DEFAULT_CONFIG = {
99
COMBINE_CHUNKS_SIMILARITY_THRESHOLD: 0.5,
1010
ONNX_EMBEDDING_MODEL: "Xenova/all-MiniLM-L6-v2",
1111
DTYPE: 'q8',
12+
DEVICE: 'cpu', // webgpu
1213
LOCAL_MODEL_PATH: "./models",
1314
MODEL_CACHE_DIR: "./models",
1415
RETURN_EMBEDDING: false,

embeddingUtils.js

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const embeddingCache = new LRUCache({
1818
export async function initializeEmbeddingUtils(
1919
onnxEmbeddingModel,
2020
dtype = 'fp32',
21+
device = 'cpu',
2122
localModelPath = null,
2223
modelCacheDir = null
2324
) {
@@ -27,15 +28,22 @@ export async function initializeEmbeddingUtils(
2728
if (modelCacheDir) env.cacheDir = modelCacheDir;
2829

2930
tokenizer = await AutoTokenizer.from_pretrained(onnxEmbeddingModel);
30-
generateEmbedding = await pipeline('feature-extraction', onnxEmbeddingModel, {
31+
const pipelineOptions = {
3132
dtype: dtype,
32-
});
33+
};
34+
35+
if (device !== 'webgpu') {
36+
pipelineOptions.device = device;
37+
}
38+
39+
generateEmbedding = await pipeline('feature-extraction', onnxEmbeddingModel, pipelineOptions);
3340

3441
embeddingCache.clear();
3542

3643
return {
3744
modelName: onnxEmbeddingModel,
38-
dtype: dtype
45+
dtype: dtype,
46+
device: device,
3947
};
4048
}
4149

example/example-chunkit.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ for (const textFile of textFiles) {
3131
});
3232
}
3333

34+
// Get device from command line arguments, default to 'cpu'
35+
const device = process.argv[2] || 'cpu';
36+
3437
// start timing
3538
const startTime = performance.now();
3639

@@ -47,6 +50,7 @@ let myTestChunks = await chunkit(
4750
combineChunksSimilarityThreshold: 0.700,
4851
onnxEmbeddingModel: "Xenova/all-MiniLM-L6-v2",
4952
dtype: "q8",
53+
device: device, // Pass the device to chunkit
5054
localModelPath: "../models",
5155
modelCacheDir: "../models",
5256
returnTokenLength: true,
@@ -61,8 +65,8 @@ const endTime = performance.now();
6165
let trackedTimeSeconds = (endTime - startTime) / 1000;
6266
trackedTimeSeconds = parseFloat(trackedTimeSeconds.toFixed(2));
6367

64-
console.log("\n\n");
6568
console.log("myTestChunks:");
6669
console.log(myTestChunks);
70+
console.log(`device: ${device}`);
6771
console.log("length: " + myTestChunks.length);
6872
console.log("trackedTimeSeconds: " + trackedTimeSeconds);

example/example-sentenceit.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ for (const textFile of textFiles) {
3434
});
3535
}
3636

37+
// Get device from command line arguments, default to 'cpu'
38+
const device = process.argv[2] || 'cpu';
39+
3740
// start timing
3841
const startTime = performance.now();
3942

@@ -43,6 +46,7 @@ let myTestSentences = await sentenceit(
4346
logging: false,
4447
onnxEmbeddingModel: "Xenova/all-MiniLM-L6-v2",
4548
dtype: 'fp32',
49+
device: device,
4650
localModelPath: "../models",
4751
modelCacheDir: "../models",
4852
returnEmbedding: true,
@@ -59,5 +63,6 @@ trackedTimeSeconds = parseFloat(trackedTimeSeconds.toFixed(2));
5963
console.log("\n\n\n");
6064
console.log("myTestSentences:");
6165
console.log(myTestSentences);
66+
console.log(`device: ${device}`);
6267
console.log("length: " + myTestSentences.length);
63-
console.log("trackedTimeSeconds: " + trackedTimeSeconds);
68+
console.log("trackedTimeSeconds: " + trackedTimeSeconds);

webui/README.md

Lines changed: 110 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,109 +1,110 @@
1-
# 🍱 Semantic Chunking Web UI
2-
3-
A web-based interface for experimenting with and tuning Semantic Chunking settings. This tool provides a visual way to test and configure the `semantic-chunking` library's settings to get optimal results for your specific use case. Once you've found the best settings, you can generate code to implement them in your project.
4-
5-
## Features
6-
7-
- Real-time text chunking with live preview
8-
- Interactive controls for all chunking parameters
9-
- Visual feedback for similarity thresholds
10-
- Model selection and configuration
11-
- Results download in JSON format
12-
- Code generation for your settings
13-
- Example texts for testing
14-
- Dark mode interface
15-
- Syntax highlighting of JSON results and code samples
16-
- Line wrapping toggle for JSON results
17-
18-
![semantic-chunking_web-ui](../img/semantic-chunking_web-ui.gif)
19-
20-
## Getting Started
21-
22-
### Prerequisites
23-
- Node.js (v18 or higher recommended)
24-
- npm (comes with Node.js)
25-
26-
### Installation
27-
28-
1. Clone the repository:
29-
```bash
30-
git clone https://github.com/jparkerweb/semantic-chunking.git
31-
```
32-
33-
2. Navigate to the webui directory:
34-
```bash
35-
cd semantic-chunking/webui
36-
```
37-
38-
3. Install dependencies:
39-
```bash
40-
npm install
41-
```
42-
43-
4. Start the server:
44-
```bash
45-
npm start
46-
```
47-
48-
5. Open your browser and visit:
49-
```bash
50-
http://localhost:3000
51-
```
52-
53-
## Usage
54-
55-
### Basic Controls
56-
57-
- **Document Name**: Name for your input text
58-
- **Text to Chunk**: Your input text to be processed
59-
- **Max Token Size**: Maximum size for each chunk (50-2500 tokens)
60-
- **Similarity Threshold**: Base threshold for semantic similarity (0.1-1.0)
61-
- **Similarity Sentences Lookahead**: Number of sentences to look ahead when calculating similarity (1-10)
62-
63-
### Advanced Settings
64-
65-
- **Dynamic Threshold Bounds**: Lower and upper bounds for dynamic similarity threshold adjustment
66-
- **Combine Chunks**: Enable/disable chunk combination phase
67-
- **Combine Chunks Similarity Threshold**: Threshold for combining similar chunks
68-
69-
### Model Settings
70-
71-
- **Embedding Model**: Choose from various supported embedding models
72-
- **Quantized Model**: Toggle model quantization for reduced memory usage
73-
74-
### Output Settings
75-
76-
- **Return Token Length**: Include token count in results
77-
- **Return Embedding**: Include embeddings in results
78-
- **Chunk Prefix**: Add prefix to chunks (useful for RAG applications)
79-
- **Exclude Chunk Prefix in Results**: Remove prefix from final results
80-
81-
### Example Texts
82-
83-
Use the provided example texts to test different scenarios:
84-
- `similar.txt`: Text with high semantic similarity between sentences
85-
- `different.txt`: Text with low semantic similarity between sentences
86-
87-
### Results
88-
89-
- View chunked results in real-time
90-
- See chunk count, average token length, and processing time
91-
- Download results as JSON
92-
- Get generated code with your current settings
93-
94-
## Development
95-
96-
The web UI is built with:
97-
- `semantic-chunking` library for text processing
98-
- Express.js for the backend
99-
- Vanilla JavaScript (ES6+) for the frontend
100-
- CSS3 for styling
101-
102-
## License
103-
104-
This project is licensed under the MIT License - see the LICENSE file for details.
105-
106-
## Appreciation
107-
108-
If you enjoy this package please consider sending me a tip to support my work 😀
109-
# [🍵 tip me here](https://ko-fi.com/jparkerweb)
1+
# 🍱 Semantic Chunking Web UI
2+
3+
A web-based interface for experimenting with and tuning Semantic Chunking settings. This tool provides a visual way to test and configure the `semantic-chunking` library's settings to get optimal results for your specific use case. Once you've found the best settings, you can generate code to implement them in your project.
4+
5+
## Features
6+
7+
- Real-time text chunking with live preview
8+
- Interactive controls for all chunking parameters
9+
- Visual feedback for similarity thresholds
10+
- Model selection and configuration
11+
- Results download in JSON format
12+
- Code generation for your settings
13+
- Example texts for testing
14+
- Dark mode interface
15+
- Syntax highlighting of JSON results and code samples
16+
- Line wrapping toggle for JSON results
17+
18+
![semantic-chunking_web-ui](../img/semantic-chunking_web-ui.gif)
19+
20+
## Getting Started
21+
22+
### Prerequisites
23+
- Node.js (v18 or higher recommended)
24+
- npm (comes with Node.js)
25+
26+
### Installation
27+
28+
1. Clone the repository:
29+
```bash
30+
git clone https://github.com/jparkerweb/semantic-chunking.git
31+
```
32+
33+
2. Navigate to the webui directory:
34+
```bash
35+
cd semantic-chunking/webui
36+
```
37+
38+
3. Install dependencies:
39+
```bash
40+
npm install
41+
```
42+
43+
4. Start the server:
44+
```bash
45+
npm start
46+
```
47+
48+
5. Open your browser and visit:
49+
```bash
50+
http://localhost:3000
51+
```
52+
53+
## Usage
54+
55+
### Basic Controls
56+
57+
- **Document Name**: Name for your input text
58+
- **Text to Chunk**: Your input text to be processed
59+
- **Max Token Size**: Maximum size for each chunk (50-2500 tokens)
60+
- **Similarity Threshold**: Base threshold for semantic similarity (0.1-1.0)
61+
- **Similarity Sentences Lookahead**: Number of sentences to look ahead when calculating similarity (1-10)
62+
63+
### Advanced Settings
64+
65+
- **Dynamic Threshold Bounds**: Lower and upper bounds for dynamic similarity threshold adjustment
66+
- **Combine Chunks**: Enable/disable chunk combination phase
67+
- **Combine Chunks Similarity Threshold**: Threshold for combining similar chunks
68+
69+
### Model Settings
70+
71+
- **Embedding Model**: Choose from various supported embedding models
72+
- **DType**: Select the data type for the model, affecting precision and performance (e.g., `fp32`, `fp16`, `q8`).
73+
- **Device**: Choose the processing device (`cpu` or `webgpu`).
74+
75+
### Output Settings
76+
77+
- **Return Token Length**: Include token count in results
78+
- **Return Embedding**: Include embeddings in results
79+
- **Chunk Prefix**: Add prefix to chunks (useful for RAG applications)
80+
- **Exclude Chunk Prefix in Results**: Remove prefix from final results
81+
82+
### Example Texts
83+
84+
Use the provided example texts to test different scenarios:
85+
- `similar.txt`: Text with high semantic similarity between sentences
86+
- `different.txt`: Text with low semantic similarity between sentences
87+
88+
### Results
89+
90+
- View chunked results in real-time
91+
- See chunk count, average token length, and processing time
92+
- Download results as JSON
93+
- Get generated code with your current settings
94+
95+
## Development
96+
97+
The web UI is built with:
98+
- `semantic-chunking` library for text processing
99+
- Express.js for the backend
100+
- Vanilla JavaScript (ES6+) for the frontend
101+
- CSS3 for styling
102+
103+
## License
104+
105+
This project is licensed under the MIT License - see the LICENSE file for details.
106+
107+
## Appreciation
108+
109+
If you enjoy this package please consider sending me a tip to support my work 😀
110+
# [🍵 tip me here](https://ko-fi.com/jparkerweb)

0 commit comments

Comments
 (0)