@@ -2095,7 +2095,7 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
20952095
20962096/**
20972097 * @typedef {Object } ImageSegmentationPipelineOutput
2098- * @property {string } label The label of the segment.
2098+ * @property {string|null } label The label of the segment.
20992099 * @property {number|null } score The score of the segment.
21002100 * @property {RawImage } mask The mask of the segment.
21012101 *
@@ -2165,14 +2165,30 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
21652165 const preparedImages = await prepareImages ( images ) ;
21662166 const imageSizes = preparedImages . map ( x => [ x . height , x . width ] ) ;
21672167
2168- const { pixel_values, pixel_mask } = await this . processor ( preparedImages ) ;
2169- const output = await this . model ( { pixel_values, pixel_mask } ) ;
2168+ const inputs = await this . processor ( preparedImages ) ;
2169+
2170+ const { inputNames, outputNames } = this . model . sessions [ 'model' ] ;
2171+ if ( ! inputNames . includes ( 'pixel_values' ) ) {
2172+ if ( inputNames . length !== 1 ) {
2173+ throw Error ( `Expected a single input name, but got ${ inputNames . length } inputs: ${ inputNames } .` ) ;
2174+ }
2175+
2176+ const newName = inputNames [ 0 ] ;
2177+ if ( newName in inputs ) {
2178+ throw Error ( `Input name ${ newName } already exists in the inputs.` ) ;
2179+ }
2180+ // To ensure compatibility with certain background-removal models,
2181+ // we may need to perform a mapping of input to output names
2182+ inputs [ newName ] = inputs . pixel_values ;
2183+ }
2184+
2185+ const output = await this . model ( inputs ) ;
21702186
21712187 let fn = null ;
21722188 if ( subtask !== null ) {
21732189 fn = this . subtasks_mapping [ subtask ] ;
2174- } else {
2175- for ( let [ task , func ] of Object . entries ( this . subtasks_mapping ) ) {
2190+ } else if ( this . processor . image_processor ) {
2191+ for ( const [ task , func ] of Object . entries ( this . subtasks_mapping ) ) {
21762192 if ( func in this . processor . image_processor ) {
21772193 fn = this . processor . image_processor [ func ] . bind ( this . processor . image_processor ) ;
21782194 subtask = task ;
@@ -2186,7 +2202,23 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
21862202
21872203 /** @type {ImageSegmentationPipelineOutput[] } */
21882204 const annotation = [ ] ;
2189- if ( subtask === 'panoptic' || subtask === 'instance' ) {
2205+ if ( ! subtask ) {
2206+ // Perform standard image segmentation
2207+ const result = output [ outputNames [ 0 ] ] ;
2208+ for ( let i = 0 ; i < imageSizes . length ; ++ i ) {
2209+ const size = imageSizes [ i ] ;
2210+ const item = result [ i ] ;
2211+ if ( item . data . some ( x => x < 0 || x > 1 ) ) {
2212+ item . sigmoid_ ( ) ;
2213+ }
2214+ const mask = await RawImage . fromTensor ( item . mul_ ( 255 ) . to ( 'uint8' ) ) . resize ( size [ 1 ] , size [ 0 ] ) ;
2215+ annotation . push ( {
2216+ label : null ,
2217+ score : null ,
2218+ mask
2219+ } ) ;
2220+ }
2221+ } else if ( subtask === 'panoptic' || subtask === 'instance' ) {
21902222 const processed = fn (
21912223 output ,
21922224 threshold ,
@@ -2242,6 +2274,63 @@ export class ImageSegmentationPipeline extends (/** @type {new (options: ImagePi
22422274 }
22432275}
22442276
2277+
2278+ /**
2279+ * @typedef {Object } BackgroundRemovalPipelineOptions Parameters specific to image segmentation pipelines.
2280+ *
2281+ * @callback BackgroundRemovalPipelineCallback Segment the input images.
2282+ * @param {ImagePipelineInputs } images The input images.
2283+ * @param {BackgroundRemovalPipelineOptions } [options] The options to use for image segmentation.
2284+ * @returns {Promise<RawImage[]> } The images with the background removed.
2285+ *
2286+ * @typedef {ImagePipelineConstructorArgs & BackgroundRemovalPipelineCallback & Disposable } BackgroundRemovalPipelineType
2287+ */
2288+
2289+ /**
2290+ * Background removal pipeline using certain `AutoModelForXXXSegmentation`.
2291+ * This pipeline removes the backgrounds of images.
2292+ *
2293+ * **Example:** Perform background removal with `Xenova/modnet`.
2294+ * ```javascript
2295+ * const segmenter = await pipeline('background-removal', 'Xenova/modnet');
2296+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/portrait-of-woman_small.jpg';
2297+ * const output = await segmenter(url);
2298+ * // [
2299+ * // RawImage { data: Uint8ClampedArray(648000) [ ... ], width: 360, height: 450, channels: 4 }
2300+ * // ]
2301+ * ```
2302+ */
2303+ export class BackgroundRemovalPipeline extends ( /** @type {new (options: ImagePipelineConstructorArgs) => ImageSegmentationPipelineType } */ ( ImageSegmentationPipeline ) ) {
2304+ /**
2305+ * Create a new BackgroundRemovalPipeline.
2306+ * @param {ImagePipelineConstructorArgs } options An object used to instantiate the pipeline.
2307+ */
2308+ constructor ( options ) {
2309+ super ( options ) ;
2310+ }
2311+
2312+ /** @type {BackgroundRemovalPipelineCallback } */
2313+ async _call ( images , options = { } ) {
2314+ const isBatched = Array . isArray ( images ) ;
2315+
2316+ if ( isBatched && images . length !== 1 ) {
2317+ throw Error ( "Background removal pipeline currently only supports a batch size of 1." ) ;
2318+ }
2319+
2320+ const preparedImages = await prepareImages ( images ) ;
2321+
2322+ // @ts -expect-error TS2339
2323+ const masks = await super . _call ( images , options ) ;
2324+ const result = preparedImages . map ( ( img , i ) => {
2325+ const cloned = img . clone ( ) ;
2326+ cloned . putAlpha ( masks [ i ] . mask ) ;
2327+ return cloned ;
2328+ } ) ;
2329+
2330+ return result ;
2331+ }
2332+ }
2333+
22452334/**
22462335 * @typedef {Object } ZeroShotImageClassificationOutput
22472336 * @property {string } label The label identified by the model. It is one of the suggested `candidate_label`.
@@ -2554,7 +2643,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
25542643 const output = await this . model ( { ...text_inputs , pixel_values } ) ;
25552644
25562645 let result ;
2557- if ( 'post_process_grounded_object_detection' in this . processor ) {
2646+ if ( 'post_process_grounded_object_detection' in this . processor ) {
25582647 // @ts -ignore
25592648 const processed = this . processor . post_process_grounded_object_detection (
25602649 output ,
@@ -3134,6 +3223,16 @@ const SUPPORTED_TASKS = Object.freeze({
31343223 } ,
31353224 "type" : "multimodal" ,
31363225 } ,
3226+ "background-removal" : {
3227+ // no tokenizer
3228+ "pipeline" : BackgroundRemovalPipeline ,
3229+ "model" : [ AutoModelForImageSegmentation , AutoModelForSemanticSegmentation , AutoModelForUniversalSegmentation ] ,
3230+ "processor" : AutoProcessor ,
3231+ "default" : {
3232+ "model" : "Xenova/modnet" ,
3233+ } ,
3234+ "type" : "image" ,
3235+ } ,
31373236
31383237 "zero-shot-image-classification" : {
31393238 "tokenizer" : AutoTokenizer ,
0 commit comments