Skip to content

Commit 9556591

Browse files
committed
Update FeatureExtractionPipeline types and default
1 parent 59ef9a6 commit 9556591

File tree

2 files changed

+25
-23
lines changed

2 files changed

+25
-23
lines changed

src/pipelines.js

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -343,9 +343,8 @@ const SUPPORTED_TASKS = Object.freeze({
343343
pipeline: FeatureExtractionPipeline,
344344
model: AutoModel,
345345
default: {
346-
// TODO: replace with original
347-
// "model": "sentence-transformers/all-MiniLM-L6-v2",
348-
model: 'Xenova/all-MiniLM-L6-v2',
346+
model: 'onnx-community/all-MiniLM-L6-v2-ONNX',
347+
dtype: 'fp32',
349348
},
350349
type: 'text',
351350
},

src/pipelines/feature-extraction.js

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,47 +26,50 @@ import { Tensor, mean_pooling, quantize_embeddings } from '../utils/tensor.js';
2626
* Feature extraction pipeline using no model head. This pipeline extracts the hidden
2727
* states from the base transformer, which can be used as features in downstream tasks.
2828
*
29-
* **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization).
29+
* **Example:** Run feature extraction using `onnx-community/all-MiniLM-L6-v2-ONNX` (without pooling or normalization).
3030
* ```javascript
31-
* const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
31+
* import { pipeline } from '@huggingface/transformers';
32+
*
33+
* const extractor = await pipeline('feature-extraction', 'onnx-community/all-MiniLM-L6-v2-ONNX');
3234
* const output = await extractor('This is a simple test.');
3335
* // Tensor {
3436
* // type: 'float32',
35-
* // data: Float32Array [0.05939924716949463, 0.021655935794115067, ...],
36-
* // dims: [1, 8, 768]
37+
* // data: Float32Array [0.2157987803220749, -0.09140099585056305, ...],
38+
* // dims: [1, 8, 384]
3739
* // }
38-
* ```
3940
*
40-
* **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization).
41-
* ```javascript
42-
* const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' });
43-
* const output = await extractor('This is a simple test.', { pooling: 'mean', normalize: true });
44-
* // Tensor {
45-
* // type: 'float32',
46-
* // data: Float32Array [0.03373778983950615, -0.010106077417731285, ...],
47-
* // dims: [1, 768]
48-
* // }
41+
* // You can convert this Tensor to a nested JavaScript array using `.tolist()`:
42+
* console.log(output.tolist());
4943
* ```
5044
*
51-
* **Example:** Calculating embeddings with `sentence-transformers` models.
45+
* **Example:** Run feature extraction using `onnx-community/all-MiniLM-L6-v2-ONNX` (with pooling and normalization).
5246
* ```javascript
53-
* const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
47+
* import { pipeline } from '@huggingface/transformers';
48+
*
49+
* const extractor = await pipeline('feature-extraction', 'onnx-community/all-MiniLM-L6-v2-ONNX');
5450
* const output = await extractor('This is a simple test.', { pooling: 'mean', normalize: true });
5551
* // Tensor {
5652
* // type: 'float32',
57-
* // data: Float32Array [0.09094982594251633, -0.014774246141314507, ...],
53+
* // data: Float32Array [0.09528215229511261, -0.024730168282985687, ...],
5854
* // dims: [1, 384]
5955
* // }
56+
*
57+
* // You can convert this Tensor to a nested JavaScript array using `.tolist()`:
58+
* console.log(output.tolist());
6059
* ```
61-
* **Example:** Calculating binary embeddings with `sentence-transformers` models.
60+
*
61+
* **Example:** Run feature extraction using `onnx-community/all-MiniLM-L6-v2-ONNX` models (with pooling and binary quantization).
6262
* ```javascript
63-
* const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
63+
* const extractor = await pipeline('feature-extraction', 'onnx-community/all-MiniLM-L6-v2-ONNX');
6464
* const output = await extractor('This is a simple test.', { pooling: 'mean', quantize: true, precision: 'binary' });
6565
* // Tensor {
6666
* // type: 'int8',
67-
* // data: Int8Array [49, 108, 24, ...],
67+
* // data: Int8Array [49, 108, 25, ...],
6868
* // dims: [1, 48]
6969
* // }
70+
*
71+
* // You can convert this Tensor to a nested JavaScript array using `.tolist()`:
72+
* console.log(output.tolist());
7073
* ```
7174
*/
7275
export class FeatureExtractionPipeline

0 commit comments

Comments
 (0)