diff --git a/src/pipelines.js b/src/pipelines.js index afb627a4a..3f6b73765 100644 --- a/src/pipelines.js +++ b/src/pipelines.js @@ -1,15 +1,15 @@ /** * @file Pipelines provide a high-level, easy to use, API for running machine learning models. - * + * * **Example:** Instantiate pipeline using the `pipeline` function. * ```javascript * import { pipeline } from '@huggingface/transformers'; - * + * * const classifier = await pipeline('sentiment-analysis'); * const output = await classifier('I love transformers!'); * // [{'label': 'POSITIVE', 'score': 0.999817686}] * ``` - * + * * @module pipelines */ @@ -78,7 +78,7 @@ import { RawImage } from './utils/image.js'; /** - * @typedef {string | RawImage | URL} ImageInput + * @typedef {string | RawImage | URL | Blob | HTMLCanvasElement | OffscreenCanvas} ImageInput * @typedef {ImageInput|ImageInput[]} ImagePipelineInputs */ @@ -152,7 +152,7 @@ function get_bounding_box(box, asInteger) { /** * @callback DisposeType Disposes the item. * @returns {Promise} A promise that resolves when the item has been disposed. - * + * * @typedef {Object} Disposable * @property {DisposeType} dispose A promise that resolves when the pipeline has been disposed. */ @@ -189,7 +189,7 @@ export class Pipeline extends Callable { * @property {string} task The task of the pipeline. Useful for specifying subtasks. * @property {PreTrainedModel} model The model used by the pipeline. * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline. - * + * * @typedef {ModelTokenizerConstructorArgs} TextPipelineConstructorArgs An object used to instantiate a text-based pipeline. */ @@ -198,7 +198,7 @@ export class Pipeline extends Callable { * @property {string} task The task of the pipeline. Useful for specifying subtasks. * @property {PreTrainedModel} model The model used by the pipeline. * @property {Processor} processor The processor used by the pipeline. - * + * * @typedef {ModelProcessorConstructorArgs} AudioPipelineConstructorArgs An object used to instantiate an audio-based pipeline. * @typedef {ModelProcessorConstructorArgs} ImagePipelineConstructorArgs An object used to instantiate an image-based pipeline. */ @@ -210,7 +210,7 @@ export class Pipeline extends Callable { * @property {PreTrainedModel} model The model used by the pipeline. * @property {PreTrainedTokenizer} tokenizer The tokenizer used by the pipeline. * @property {Processor} processor The processor used by the pipeline. - * + * * @typedef {ModelTokenizerProcessorConstructorArgs} TextAudioPipelineConstructorArgs An object used to instantiate a text- and audio-based pipeline. * @typedef {ModelTokenizerProcessorConstructorArgs} TextImagePipelineConstructorArgs An object used to instantiate a text- and image-based pipeline. */ @@ -220,15 +220,15 @@ export class Pipeline extends Callable { * @property {string} label The label predicted. * @property {number} score The corresponding probability. * @typedef {TextClassificationSingle[]} TextClassificationOutput - * + * * @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines. * @property {number} [top_k=1] The number of top predictions to be returned. - * + * * @callback TextClassificationPipelineCallback Classify the text(s) given as inputs. * @param {string|string[]} texts The input text(s) to be classified. * @param {TextClassificationPipelineOptions} [options] The options to use for text classification. * @returns {Promise} An array or object containing the predicted labels and scores. - * + * * @typedef {TextPipelineConstructorArgs & TextClassificationPipelineCallback & Disposable} TextClassificationPipelineType */ @@ -241,7 +241,7 @@ export class Pipeline extends Callable { * const output = await classifier('I love transformers!'); * // [{ label: 'POSITIVE', score: 0.999788761138916 }] * ``` - * + * * **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes). * ```javascript * const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment'); @@ -254,7 +254,7 @@ export class Pipeline extends Callable { * // { label: '2 stars', score: 0.0009423971059732139 } * // ] * ``` - * + * * **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes). * ```javascript * const classifier = await pipeline('text-classification', 'Xenova/toxic-bert'); @@ -339,21 +339,21 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi * @property {number} [start] The index of the start of the corresponding entity in the sentence. * @property {number} [end] The index of the end of the corresponding entity in the sentence. * @typedef {TokenClassificationSingle[]} TokenClassificationOutput - * + * * @typedef {Object} TokenClassificationPipelineOptions Parameters specific to token classification pipelines. * @property {string[]} [ignore_labels] A list of labels to ignore. - * + * * @callback TokenClassificationPipelineCallback Classify each token of the text(s) given as inputs. * @param {string|string[]} texts One or several texts (or one list of texts) for token classification. * @param {TokenClassificationPipelineOptions} [options] The options to use for token classification. * @returns {Promise} The result. - * + * * @typedef {TextPipelineConstructorArgs & TokenClassificationPipelineCallback & Disposable} TokenClassificationPipelineType */ /** * Named Entity Recognition pipeline using any `ModelForTokenClassification`. - * + * * **Example:** Perform named entity recognition with `Xenova/bert-base-NER`. * ```javascript * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER'); @@ -363,7 +363,7 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi * // { entity: 'B-LOC', score: 0.9994474053382874, index: 9, word: 'London' } * // ] * ``` - * + * * **Example:** Perform named entity recognition with `Xenova/bert-base-NER` (and return all labels). * ```javascript * const classifier = await pipeline('token-classification', 'Xenova/bert-base-NER'); @@ -459,22 +459,22 @@ export class TokenClassificationPipeline extends (/** @type {new (options: TextP * @property {number} [start] The character start index of the answer (in the tokenized version of the input). * @property {number} [end] The character end index of the answer (in the tokenized version of the input). * @property {string} answer The answer to the question. - * + * * @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines. * @property {number} [top_k=1] The number of top answer predictions to be returned. - * + * * @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s). * @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument). * @param {string|string[]} context One or several context(s) associated with the question(s) (must be used in conjunction with the `question` argument). * @param {QuestionAnsweringPipelineOptions} [options] The options to use for question answering. * @returns {Promise} An array or object containing the predicted answers and scores. - * + * * @typedef {TextPipelineConstructorArgs & QuestionAnsweringPipelineCallback & Disposable} QuestionAnsweringPipelineType */ /** * Question Answering pipeline using any `ModelForQuestionAnswering`. - * + * * **Example:** Run question answering with `Xenova/distilbert-base-uncased-distilled-squad`. * ```javascript * const answerer = await pipeline('question-answering', 'Xenova/distilbert-base-uncased-distilled-squad'); @@ -599,10 +599,10 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip * @property {number} token The predicted token id (to replace the masked one). * @property {string} token_str The predicted token (to replace the masked one). * @typedef {FillMaskSingle[]} FillMaskOutput - * + * * @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines. * @property {number} [top_k=5] When passed, overrides the number of predictions to return. - * + * * @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs. * @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens. * @param {FillMaskPipelineOptions} [options] The options to use for masked language modelling. @@ -610,13 +610,13 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip * and the sequence with the predicted token filled in, or an array of such arrays (one for each input text). * If only one input text is given, the output will be an array of objects. * @throws {Error} When the mask token is not found in the input text. - * + * * @typedef {TextPipelineConstructorArgs & FillMaskPipelineCallback & Disposable} FillMaskPipelineType */ /** * Masked language modeling prediction pipeline using any `ModelWithLMHead`. - * + * * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-uncased`. * ```javascript * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased'); @@ -629,7 +629,7 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip * // { token_str: 'life', score: 0.01859794743359089, token: 1297, sequence: 'The goal of life is life.' } * // ] * ``` - * + * * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result). * ```javascript * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased'); @@ -706,18 +706,18 @@ export class FillMaskPipeline extends (/** @type {new (options: TextPipelineCons * @typedef {Object} Text2TextGenerationSingle * @property {string} generated_text The generated text. * @typedef {Text2TextGenerationSingle[]} Text2TextGenerationOutput - * + * * @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs. * @param {string|string[]} texts Input text for the encoder. * @param {Partial} [options] Additional keyword arguments to pass along to the generate method of the model. * @returns {Promise} - * + * * @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType */ /** * Text2TextGenerationPipeline class for generating text using a model that performs text-to-text generation tasks. - * + * * **Example:** Text-to-text generation w/ `Xenova/LaMini-Flan-T5-783M`. * ```javascript * const generator = await pipeline('text2text-generation', 'Xenova/LaMini-Flan-T5-783M'); @@ -793,18 +793,18 @@ export class Text2TextGenerationPipeline extends (/** @type {new (options: TextP * @typedef {Object} SummarizationSingle * @property {string} summary_text The summary text. * @typedef {SummarizationSingle[]} SummarizationOutput - * + * * @callback SummarizationPipelineCallback Summarize the text(s) given as inputs. * @param {string|string[]} texts One or several articles (or one list of articles) to summarize. * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model. * @returns {Promise} - * + * * @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType */ /** * A pipeline for summarization tasks, inheriting from Text2TextGenerationPipeline. - * + * * **Example:** Summarization w/ `Xenova/distilbart-cnn-6-6`. * ```javascript * const generator = await pipeline('summarization', 'Xenova/distilbart-cnn-6-6'); @@ -840,23 +840,23 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin * @typedef {Object} TranslationSingle * @property {string} translation_text The translated text. * @typedef {TranslationSingle[]} TranslationOutput - * + * * @callback TranslationPipelineCallback Translate the text(s) given as inputs. * @param {string|string[]} texts Texts to be translated. * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model. * @returns {Promise} - * + * * @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType */ /** * Translates text from one language to another. - * + * * **Example:** Multilingual translation w/ `Xenova/nllb-200-distilled-600M`. - * + * * See [here](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200) * for the full list of languages and their corresponding codes. - * + * * ```javascript * const translator = await pipeline('translation', 'Xenova/nllb-200-distilled-600M'); * const output = await translator('जीवन एक चॉकलेट बॉक्स की तरह है।', { @@ -865,12 +865,12 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin * }); * // [{ translation_text: 'La vie est comme une boîte à chocolat.' }] * ``` - * + * * **Example:** Multilingual translation w/ `Xenova/m2m100_418M`. - * + * * See [here](https://huggingface.co/facebook/m2m100_418M#languages-covered) * for the full list of languages and their corresponding codes. - * + * * ```javascript * const translator = await pipeline('translation', 'Xenova/m2m100_418M'); * const output = await translator('生活就像一盒巧克力。', { @@ -879,12 +879,12 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin * }); * // [{ translation_text: 'Life is like a box of chocolate.' }] * ``` - * + * * **Example:** Multilingual translation w/ `Xenova/mbart-large-50-many-to-many-mmt`. - * + * * See [here](https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt#languages-covered) * for the full list of languages and their corresponding codes. - * + * * ```javascript * const translator = await pipeline('translation', 'Xenova/mbart-large-50-many-to-many-mmt'); * const output = await translator('संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है', { @@ -913,21 +913,21 @@ function isChat(x) { /** * @typedef {import('./tokenizers.js').Message[]} Chat - * + * * @typedef {Object} TextGenerationSingle * @property {string|Chat} generated_text The generated text. * @typedef {TextGenerationSingle[]} TextGenerationOutput - * + * * @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines. * @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences. * @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned. * @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig - * + * * @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs. * @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete. * @param {Partial} [options] Additional keyword arguments to pass along to the generate method of the model. * @returns {Promise} An array or object containing the generated texts. - * + * * @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType */ @@ -935,7 +935,7 @@ function isChat(x) { * Language generation pipeline using any `ModelWithLMHead` or `ModelForCausalLM`. * This pipeline predicts the words that will follow a specified text prompt. * NOTE: For the full list of generation parameters, see [`GenerationConfig`](./utils/generation#module_utils/generation.GenerationConfig). - * + * * **Example:** Text generation with `Xenova/distilgpt2` (default settings). * ```javascript * const generator = await pipeline('text-generation', 'Xenova/distilgpt2'); @@ -943,7 +943,7 @@ function isChat(x) { * const output = await generator(text); * // [{ generated_text: "I enjoy walking with my cute dog, and I love to play with the other dogs." }] * ``` - * + * * **Example:** Text generation with `Xenova/distilgpt2` (custom settings). * ```javascript * const generator = await pipeline('text-generation', 'Xenova/distilgpt2'); @@ -962,7 +962,7 @@ function isChat(x) { * // "generated_text": "Once upon a time, there was an abundance of information about the most important and influential" * // }] * ``` - * + * * **Example:** Run code generation with `Xenova/codegen-350M-mono`. * ```javascript * const generator = await pipeline('text-generation', 'Xenova/codegen-350M-mono'); @@ -1081,7 +1081,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli * @property {string} sequence The sequence for which this is the output. * @property {string[]} labels The labels sorted by order of likelihood. * @property {number[]} scores The probabilities for each of the labels. - * + * * @typedef {Object} ZeroShotClassificationPipelineOptions Parameters specific to zero-shot classification pipelines. * @property {string} [hypothesis_template="This example is {}."] The template used to turn each * candidate label into an NLI-style hypothesis. The candidate label will replace the {} placeholder. @@ -1089,14 +1089,14 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli * If `false`, the scores are normalized such that the sum of the label likelihoods for each sequence * is 1. If `true`, the labels are considered independent and probabilities are normalized for each * candidate by doing a softmax of the entailment score vs. the contradiction score. - * + * * @callback ZeroShotClassificationPipelineCallback Classify the sequence(s) given as inputs. * @param {string|string[]} texts The sequence(s) to classify, will be truncated if the model input is too large. * @param {string|string[]} candidate_labels The set of possible class labels to classify each sequence into. * Can be a single label, a string of comma-separated labels, or a list of labels. * @param {ZeroShotClassificationPipelineOptions} [options] The options to use for zero-shot classification. * @returns {Promise} An array or object containing the predicted labels and scores. - * + * * @typedef {TextPipelineConstructorArgs & ZeroShotClassificationPipelineCallback & Disposable} ZeroShotClassificationPipelineType */ @@ -1105,7 +1105,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli * trained on NLI (natural language inference) tasks. Equivalent of `text-classification` * pipelines, but these models don't require a hardcoded number of potential classes, they * can be chosen at runtime. It usually means it's slower but it is **much** more flexible. - * + * * **Example:** Zero shot classification with `Xenova/mobilebert-uncased-mnli`. * ```javascript * const classifier = await pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli'); @@ -1118,7 +1118,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli * // scores: [ 0.5562091040482018, 0.1843621307860853, 0.13942646639336376, 0.12000229877234923 ] * // } * ``` - * + * * **Example:** Zero shot classification with `Xenova/nli-deberta-v3-xsmall` (multi-label). * ```javascript * const classifier = await pipeline('zero-shot-classification', 'Xenova/nli-deberta-v3-xsmall'); @@ -1232,20 +1232,20 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te * @property {'none'|'mean'|'cls'} [pooling="none"] The pooling method to use. * @property {boolean} [normalize=false] Whether or not to normalize the embeddings in the last dimension. * @property {boolean} [quantize=false] Whether or not to quantize the embeddings. - * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization. - * + * @property {'binary'|'ubinary'} [precision='binary'] The precision to use for quantization. + * * @callback FeatureExtractionPipelineCallback Extract the features of the input(s). * @param {string|string[]} texts One or several texts (or one list of texts) to get the features of. * @param {FeatureExtractionPipelineOptions} [options] The options to use for feature extraction. * @returns {Promise} The features computed by the model. - * + * * @typedef {TextPipelineConstructorArgs & FeatureExtractionPipelineCallback & Disposable} FeatureExtractionPipelineType */ /** * Feature extraction pipeline using no model head. This pipeline extracts the hidden * states from the base transformer, which can be used as features in downstream tasks. - * + * * **Example:** Run feature extraction with `bert-base-uncased` (without pooling/normalization). * ```javascript * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' }); @@ -1256,7 +1256,7 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te * // dims: [1, 8, 768] * // } * ``` - * + * * **Example:** Run feature extraction with `bert-base-uncased` (with pooling/normalization). * ```javascript * const extractor = await pipeline('feature-extraction', 'Xenova/bert-base-uncased', { revision: 'default' }); @@ -1267,7 +1267,7 @@ export class ZeroShotClassificationPipeline extends (/** @type {new (options: Te * // dims: [1, 768] * // } * ``` - * + * * **Example:** Calculating embeddings with `sentence-transformers` models. * ```javascript * const extractor = await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2'); @@ -1348,19 +1348,19 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip /** * @typedef {Object} ImageFeatureExtractionPipelineOptions Parameters specific to image feature extraction pipelines. * @property {boolean} [pool=null] Whether or not to return the pooled output. If set to `false`, the model will return the raw hidden states. - * + * * @callback ImageFeatureExtractionPipelineCallback Extract the features of the input(s). * @param {ImagePipelineInputs} images One or several images (or one list of images) to get the features of. * @param {ImageFeatureExtractionPipelineOptions} [options] The options to use for image feature extraction. * @returns {Promise} The image features computed by the model. - * + * * @typedef {ImagePipelineConstructorArgs & ImageFeatureExtractionPipelineCallback & Disposable} ImageFeatureExtractionPipelineType */ /** * Image feature extraction pipeline using no model head. This pipeline extracts the hidden * states from the base transformer, which can be used as features in downstream tasks. - * + * * **Example:** Perform image feature extraction with `Xenova/vit-base-patch16-224-in21k`. * ```javascript * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/vit-base-patch16-224-in21k'); @@ -1373,7 +1373,7 @@ export class FeatureExtractionPipeline extends (/** @type {new (options: TextPip * // size: 151296 * // } * ``` - * + * * **Example:** Compute image embeddings with `Xenova/clip-vit-base-patch32`. * ```javascript * const image_feature_extractor = await pipeline('image-feature-extraction', 'Xenova/clip-vit-base-patch32'); @@ -1429,12 +1429,12 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im * @property {string} label The label predicted. * @property {number} score The corresponding probability. * @typedef {AudioClassificationSingle[]} AudioClassificationOutput - * + * * @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines. * @property {number} [top_k=5] The number of top labels that will be returned by the pipeline. * If the provided number is `null` or higher than the number of labels available in the model configuration, * it will default to the number of labels. - * + * * @callback AudioClassificationPipelineCallback Classify the sequence(s) given as inputs. * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either: * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate @@ -1443,14 +1443,14 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done). * @param {AudioClassificationPipelineOptions} [options] The options to use for audio classification. * @returns {Promise} An array or object containing the predicted labels and scores. - * + * * @typedef {AudioPipelineConstructorArgs & AudioClassificationPipelineCallback & Disposable} AudioClassificationPipelineType */ /** * Audio classification pipeline using any `AutoModelForAudioClassification`. * This pipeline predicts the class of a raw waveform or an audio file. - * + * * **Example:** Perform audio classification with `Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech`. * ```javascript * const classifier = await pipeline('audio-classification', 'Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech'); @@ -1461,7 +1461,7 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im * // { label: 'female', score: 0.001845747814513743 } * // ] * ``` - * + * * **Example:** Perform audio classification with `Xenova/ast-finetuned-audioset-10-10-0.4593` and return top 4 results. * ```javascript * const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593'); @@ -1526,12 +1526,12 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio * @typedef {Object} ZeroShotAudioClassificationOutput * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`. * @property {number} score The score attributed by the model for that label (between 0 and 1). - * + * * @typedef {Object} ZeroShotAudioClassificationPipelineOptions Parameters specific to zero-shot audio classification pipelines. * @property {string} [hypothesis_template="This is a sound of {}."] The sentence used in conjunction with `candidate_labels` * to attempt the audio classification by replacing the placeholder with the candidate_labels. * Then likelihood is estimated by using `logits_per_audio`. - * + * * @callback ZeroShotAudioClassificationPipelineCallback Classify the sequence(s) given as inputs. * @param {AudioPipelineInputs} audio The input audio file(s) to be classified. The input is either: * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate @@ -1541,14 +1541,14 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio * @param {string[]} candidate_labels The candidate labels for this audio. * @param {ZeroShotAudioClassificationPipelineOptions} [options] The options to use for zero-shot audio classification. * @returns {Promise} An array of objects containing the predicted labels and scores. - * + * * @typedef {TextAudioPipelineConstructorArgs & ZeroShotAudioClassificationPipelineCallback & Disposable} ZeroShotAudioClassificationPipelineType */ /** * Zero shot audio classification pipeline using `ClapModel`. This pipeline predicts the class of an audio when you * provide an audio and a set of `candidate_labels`. - * + * * **Example**: Perform zero-shot audio classification with `Xenova/clap-htsat-unfused`. * ```javascript * const classifier = await pipeline('zero-shot-audio-classification', 'Xenova/clap-htsat-unfused'); @@ -1581,7 +1581,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option audio = [/** @type {AudioInput} */ (audio)]; } - // Insert label into hypothesis template + // Insert label into hypothesis template const texts = candidate_labels.map( x => hypothesis_template.replace('{}', x) ); @@ -1625,7 +1625,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option * @property {string} text The recognized text. * @property {Chunk[]} [chunks] When using `return_timestamps`, the `chunks` will become a list * containing all the various text chunks identified by the model. - * + * * @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines. * @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`. * @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking). @@ -1635,7 +1635,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option * @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected. * @property {number} [num_frames] The number of frames in the input audio. * @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig - * + * * @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text. * @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either: * - `string` or `URL` that is the filename/URL of the audio file, the file will be read at the processor's sampling rate @@ -1644,7 +1644,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done). * @param {Partial} [options] Additional keyword arguments to pass along to the generate method of the model. * @returns {Promise} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`. - * + * * @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType */ @@ -1658,7 +1658,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option * const output = await transcriber(url); * // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." } * ``` - * + * * **Example:** Transcribe English w/ timestamps. * ```javascript * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en'); @@ -1672,7 +1672,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option * // ] * // } * ``` - * + * * **Example:** Transcribe English w/ word-level timestamps. * ```javascript * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en'); @@ -1691,7 +1691,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option * // ] * // } * ``` - * + * * **Example:** Transcribe French. * ```javascript * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small'); @@ -1699,7 +1699,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option * const output = await transcriber(url, { language: 'french', task: 'transcribe' }); * // { text: " J'adore, j'aime, je n'aime pas, je déteste." } * ``` - * + * * **Example:** Translate French to English. * ```javascript * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-small'); @@ -1707,7 +1707,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option * const output = await transcriber(url, { language: 'french', task: 'translate' }); * // { text: " I love, I like, I don't like, I hate." } * ``` - * + * * **Example:** Transcribe/translate audio longer than 30 seconds. * ```javascript * const transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en'); @@ -1930,18 +1930,18 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options * @typedef {Object} ImageToTextSingle * @property {string} generated_text The generated text. * @typedef {ImageToTextSingle[]} ImageToTextOutput - * + * * @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs. * @param {ImagePipelineInputs} texts The images to be captioned. * @param {Partial} [options] Additional keyword arguments to pass along to the generate method of the model. * @returns {Promise} An object (or array of objects) containing the generated text(s). - * + * * @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType */ /** * Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image. - * + * * **Example:** Generate a caption for an image w/ `Xenova/vit-gpt2-image-captioning`. * ```javascript * const captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning'); @@ -1949,7 +1949,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options * const output = await captioner(url); * // [{ generated_text: 'a cat laying on a couch with another cat' }] * ``` - * + * * **Example:** Optical Character Recognition (OCR) w/ `Xenova/trocr-small-handwritten`. * ```javascript * const captioner = await pipeline('image-to-text', 'Xenova/trocr-small-handwritten'); @@ -1995,22 +1995,22 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe * @property {string} label The label identified by the model. * @property {number} score The score attributed by the model for that label. * @typedef {ImageClassificationSingle[]} ImageClassificationOutput - * + * * @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines. - * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline. - * + * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline. + * * @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs. * @param {ImagePipelineInputs} images The input images(s) to be classified. * @param {ImageClassificationPipelineOptions} [options] The options to use for image classification. * @returns {Promise} An array or object containing the predicted labels and scores. - * + * * @typedef {ImagePipelineConstructorArgs & ImageClassificationPipelineCallback & Disposable} ImageClassificationPipelineType */ /** * Image classification pipeline using any `AutoModelForImageClassification`. * This pipeline predicts the class of an image. - * + * * **Example:** Classify an image. * ```javascript * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224'); @@ -2020,7 +2020,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe * // { label: 'tiger, Panthera tigris', score: 0.632695734500885 }, * // ] * ``` - * + * * **Example:** Classify an image and return top `n` classes. * ```javascript * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224'); @@ -2032,7 +2032,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe * // { label: 'lion, king of beasts, Panthera leo', score: 0.00045060308184474707 }, * // ] * ``` - * + * * **Example:** Classify an image and return all classes. * ```javascript * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224'); @@ -2099,7 +2099,7 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image * @property {string|null} label The label of the segment. * @property {number|null} score The score of the segment. * @property {RawImage} mask The mask of the segment. - * + * * @typedef {Object} ImageSegmentationPipelineOptions Parameters specific to image segmentation pipelines. * @property {number} [threshold=0.5] Probability threshold to filter out predicted masks. * @property {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values. @@ -2108,19 +2108,19 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image * depending on model capabilities. If not set, the pipeline will attempt to resolve (in that order). * @property {number[]} [label_ids_to_fuse=null] List of label ids to fuse. If not set, do not fuse any labels. * @property {number[][]} [target_sizes=null] List of target sizes for the input images. If not set, use the original image sizes. - * + * * @callback ImageSegmentationPipelineCallback Segment the input images. * @param {ImagePipelineInputs} images The input images. * @param {ImageSegmentationPipelineOptions} [options] The options to use for image segmentation. * @returns {Promise} The annotated segments. - * + * * @typedef {ImagePipelineConstructorArgs & ImageSegmentationPipelineCallback & Disposable} ImageSegmentationPipelineType */ /** * Image segmentation pipeline using any `AutoModelForXXXSegmentation`. * This pipeline predicts masks of objects and their classes. - * + * * **Example:** Perform image segmentation with `Xenova/detr-resnet-50-panoptic`. * ```javascript * const segmenter = await pipeline('image-segmentation', 'Xenova/detr-resnet-50-panoptic'); @@ -2278,19 +2278,19 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi /** * @typedef {Object} BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines. - * + * * @callback BackgroundRemovalPipelineCallback Segment the input images. * @param {ImagePipelineInputs} images The input images. * @param {BackgroundRemovalPipelineOptions} [options] The options to use for image segmentation. * @returns {Promise} The images with the background removed. - * + * * @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable} BackgroundRemovalPipelineType */ /** * Background removal pipeline using certain `AutoModelForXXXSegmentation`. * This pipeline removes the backgrounds of images. - * + * * **Example:** Perform background removal with `Xenova/modnet`. * ```javascript * const segmenter = await pipeline('background-removal', 'Xenova/modnet'); @@ -2301,7 +2301,7 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi * // ] * ``` */ -export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => ImageSegmentationPipelineType} */ (ImageSegmentationPipeline)) { +export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePipelineConstructorArgs) => BackgroundRemovalPipelineType} */ (/** @type {any} */(ImageSegmentationPipeline))) { /** * Create a new BackgroundRemovalPipeline. * @param {ImagePipelineConstructorArgs} options An object used to instantiate the pipeline. @@ -2336,25 +2336,25 @@ export class BackgroundRemovalPipeline extends (/** @type {new (options: ImagePi * @typedef {Object} ZeroShotImageClassificationOutput * @property {string} label The label identified by the model. It is one of the suggested `candidate_label`. * @property {number} score The score attributed by the model for that label (between 0 and 1). - * + * * @typedef {Object} ZeroShotImageClassificationPipelineOptions Parameters specific to zero-shot image classification pipelines. * @property {string} [hypothesis_template="This is a photo of {}"] The sentence used in conjunction with `candidate_labels` * to attempt the image classification by replacing the placeholder with the candidate_labels. * Then likelihood is estimated by using `logits_per_image`. - * + * * @callback ZeroShotImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs. * @param {ImagePipelineInputs} images The input images. * @param {string[]} candidate_labels The candidate labels for this image. * @param {ZeroShotImageClassificationPipelineOptions} [options] The options to use for zero-shot image classification. * @returns {Promise} An array of objects containing the predicted labels and scores. - * + * * @typedef {TextImagePipelineConstructorArgs & ZeroShotImageClassificationPipelineCallback & Disposable} ZeroShotImageClassificationPipelineType */ /** * Zero shot image classification pipeline. This pipeline predicts the class of * an image when you provide an image and a set of `candidate_labels`. - * + * * **Example:** Zero shot image classification w/ `Xenova/clip-vit-base-patch32`. * ```javascript * const classifier = await pipeline('zero-shot-image-classification', 'Xenova/clip-vit-base-patch32'); @@ -2384,7 +2384,7 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option const isBatched = Array.isArray(images); const preparedImages = await prepareImages(images); - // Insert label into hypothesis template + // Insert label into hypothesis template const texts = candidate_labels.map( x => hypothesis_template.replace('{}', x) ); @@ -2431,23 +2431,23 @@ export class ZeroShotImageClassificationPipeline extends (/** @type {new (option * @property {number} score The score attributed by the model for that label. * @property {BoundingBox} box The bounding box of detected object in image's original size, or as a percentage if `percentage` is set to true. * @typedef {ObjectDetectionPipelineSingle[]} ObjectDetectionPipelineOutput - * + * * @typedef {Object} ObjectDetectionPipelineOptions Parameters specific to object detection pipelines. * @property {number} [threshold=0.9] The threshold used to filter boxes by score. * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false). - * + * * @callback ObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs. * @param {ImagePipelineInputs} images The input images. * @param {ObjectDetectionPipelineOptions} [options] The options to use for object detection. - * @returns {Promise} A list of objects or a list of list of objects. - * + * @returns {Promise} A list of objects or a list of list of objects. + * * @typedef {ImagePipelineConstructorArgs & ObjectDetectionPipelineCallback & Disposable} ObjectDetectionPipelineType */ /** * Object detection pipeline using any `AutoModelForObjectDetection`. * This pipeline predicts bounding boxes of objects and their classes. - * + * * **Example:** Run object-detection with `Xenova/detr-resnet-50`. * ```javascript * const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50'); @@ -2521,27 +2521,27 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe * @property {string} label Text query corresponding to the found object. * @property {number} score Score corresponding to the object (between 0 and 1). * @property {BoundingBox} box Bounding box of the detected object in image's original size, or as a percentage if `percentage` is set to true. - * + * * @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines. * @property {number} [threshold=0.1] The probability necessary to make a prediction. * @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline. * If the provided number is `null` or higher than the number of predictions available, it will default * to the number of predictions. * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false). - * + * * @callback ZeroShotObjectDetectionPipelineCallback Detect objects (bounding boxes & classes) in the image(s) passed as inputs. * @param {ImagePipelineInputs} images The input images. * @param {string[]} candidate_labels What the model should recognize in the image. * @param {ZeroShotObjectDetectionPipelineOptions} [options] The options to use for zero-shot object detection. * @returns {Promise} An array of objects containing the predicted labels, scores, and bounding boxes. - * + * * @typedef {TextImagePipelineConstructorArgs & ZeroShotObjectDetectionPipelineCallback & Disposable} ZeroShotObjectDetectionPipelineType */ /** * Zero-shot object detection pipeline. This pipeline predicts bounding boxes of * objects when you provide an image and a set of `candidate_labels`. - * + * * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32`. * ```javascript * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32'); @@ -2571,7 +2571,7 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe * // } * // ] * ``` - * + * * **Example:** Zero-shot object detection w/ `Xenova/owlvit-base-patch32` (returning top 4 matches and setting a threshold). * ```javascript * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32'); @@ -2686,13 +2686,13 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T * @typedef {Object} DocumentQuestionAnsweringSingle * @property {string} answer The generated text. * @typedef {DocumentQuestionAnsweringSingle[]} DocumentQuestionAnsweringOutput - * + * * @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document. * @param {ImageInput} image The image of the document to use. * @param {string} question A question to ask of the document. * @param {Partial} [options] Additional keyword arguments to pass along to the generate method of the model. * @returns {Promise} An object (or array of objects) containing the answer(s). - * + * * @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType */ @@ -2700,7 +2700,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T * Document Question Answering pipeline using any `AutoModelForDocumentQuestionAnswering`. * The inputs/outputs are similar to the (extractive) question answering pipeline; however, * the pipeline takes an image (and optional OCR'd words/boxes) as input instead of text context. - * + * * **Example:** Answer questions about a document with `Xenova/donut-base-finetuned-docvqa`. * ```javascript * const qa_pipeline = await pipeline('document-question-answering', 'Xenova/donut-base-finetuned-docvqa'); @@ -2770,22 +2770,22 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: * @typedef {Object} TextToAudioOutput * @property {Float32Array} audio The generated audio waveform. * @property {number} sampling_rate The sampling rate of the generated audio waveform. - * + * * @typedef {Object} TextToAudioPipelineOptions Parameters specific to text-to-audio pipelines. * @property {Tensor|Float32Array|string|URL} [speaker_embeddings=null] The speaker embeddings (if the model requires it). - * + * * @callback TextToAudioPipelineCallback Generates speech/audio from the inputs. * @param {string|string[]} texts The text(s) to generate. * @param {TextToAudioPipelineOptions} options Parameters passed to the model generation/forward method. * @returns {Promise} An object containing the generated audio and sampling rate. - * + * * @typedef {TextToAudioPipelineConstructorArgs & TextToAudioPipelineCallback & Disposable} TextToAudioPipelineType */ /** * Text-to-audio generation pipeline using any `AutoModelForTextToWaveform` or `AutoModelForTextToSpectrogram`. * This pipeline generates an audio file from an input text and optional other conditional inputs. - * + * * **Example:** Generate audio from text with `Xenova/speecht5_tts`. * ```javascript * const synthesizer = await pipeline('text-to-speech', 'Xenova/speecht5_tts', { quantized: false }); @@ -2796,17 +2796,17 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options: * // sampling_rate: 16000 * // } * ``` - * + * * You can then save the audio to a .wav file with the `wavefile` package: * ```javascript * import wavefile from 'wavefile'; * import fs from 'fs'; - * + * * const wav = new wavefile.WaveFile(); * wav.fromScratch(1, out.sampling_rate, '32f', out.audio); * fs.writeFileSync('out.wav', wav.toBuffer()); * ``` - * + * * **Example:** Multilingual speech generation with `Xenova/mms-tts-fra`. See [here](https://huggingface.co/models?pipeline_tag=text-to-speech&other=vits&sort=trending) for the full list of available languages (1107). * ```javascript * const synthesizer = await pipeline('text-to-speech', 'Xenova/mms-tts-fra'); @@ -2912,13 +2912,13 @@ export class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPi * @callback ImageToImagePipelineCallback Transform the image(s) passed as inputs. * @param {ImagePipelineInputs} images The images to transform. * @returns {Promise} The transformed image or list of images. - * + * * @typedef {ImagePipelineConstructorArgs & ImageToImagePipelineCallback & Disposable} ImageToImagePipelineType */ /** * Image to Image pipeline using any `AutoModelForImageToImage`. This pipeline generates an image based on a previous image input. - * + * * **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64` * ```javascript * const upscaler = await pipeline('image-to-image', 'Xenova/swin2SR-classical-sr-x2-64'); @@ -2963,17 +2963,17 @@ export class ImageToImagePipeline extends (/** @type {new (options: ImagePipelin * @typedef {Object} DepthEstimationPipelineOutput * @property {Tensor} predicted_depth The raw depth map predicted by the model. * @property {RawImage} depth The processed depth map as an image (with the same size as the input image). - * + * * @callback DepthEstimationPipelineCallback Predicts the depth for the image(s) passed as inputs. * @param {ImagePipelineInputs} images The images to compute depth for. * @returns {Promise} An image or a list of images containing result(s). - * + * * @typedef {ImagePipelineConstructorArgs & DepthEstimationPipelineCallback & Disposable} DepthEstimationPipelineType */ /** * Depth estimation pipeline using any `AutoModelForDepthEstimation`. This pipeline predicts the depth of an image. - * + * * **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas` * ```javascript * const depth_estimator = await pipeline('depth-estimation', 'Xenova/dpt-hybrid-midas'); @@ -3358,7 +3358,7 @@ const TASK_ALIASES = Object.freeze({ /** * Utility factory method to build a `Pipeline` object. - * + * * @template {PipelineType} T The type of pipeline to return. * @param {T} task The task defining which pipeline will be returned. Currently accepted tasks are: * - `"audio-classification"`: will return a `AudioClassificationPipeline`. diff --git a/src/utils/image.js b/src/utils/image.js index 40f51625e..9d1bb3606 100644 --- a/src/utils/image.js +++ b/src/utils/image.js @@ -100,7 +100,7 @@ export class RawImage { /** * Helper method for reading an image from a variety of input types. - * @param {RawImage|string|URL} input + * @param {RawImage|string|URL|Blob|HTMLCanvasElement|OffscreenCanvas} input * @returns The image object. * * **Example:** Read image from a URL. @@ -119,6 +119,14 @@ export class RawImage { return input; } else if (typeof input === 'string' || input instanceof URL) { return await this.fromURL(input); + } else if (input instanceof Blob) { + return await this.fromBlob(input); + } else if ( + (typeof HTMLCanvasElement !== "undefined" && input instanceof HTMLCanvasElement) + || + (typeof OffscreenCanvas !== "undefined" && input instanceof OffscreenCanvas) + ) { + return this.fromCanvas(input); } else { throw new Error(`Unsupported input type: ${typeof input}`); }