11import { env , AutoProcessor , AutoImageProcessor , RawImage } from "../src/transformers.js" ;
2- import { init , MAX_TEST_EXECUTION_TIME } from "./init.js" ;
2+ import { init , MAX_TEST_TIME } from "./init.js" ;
33import { compare } from "./test_utils.js" ;
44
55// Initialise the testing environment
@@ -99,7 +99,7 @@ describe("Processors", () => {
9999 compare ( avg ( pixel_values . data ) , 0.5 ) ;
100100 }
101101 } ,
102- MAX_TEST_EXECUTION_TIME ,
102+ MAX_TEST_TIME ,
103103 ) ;
104104
105105 // SamProcessor/SamImageProcessor
@@ -171,7 +171,7 @@ describe("Processors", () => {
171171 compare ( input_boxes . tolist ( ) , [ [ [ 0 , 341.3333 , 682.6667 , 682.6667 ] ] ] ) ;
172172 }
173173 } ,
174- MAX_TEST_EXECUTION_TIME ,
174+ MAX_TEST_TIME ,
175175 ) ;
176176
177177 // DonutProcessor/DonutFeatureExtractor
@@ -193,7 +193,7 @@ describe("Processors", () => {
193193 compare ( reshaped_input_sizes , [ [ 1280 , 853 ] ] ) ;
194194 }
195195 } ,
196- MAX_TEST_EXECUTION_TIME ,
196+ MAX_TEST_TIME ,
197197 ) ;
198198
199199 // ConvNextFeatureExtractor
@@ -213,7 +213,7 @@ describe("Processors", () => {
213213 compare ( reshaped_input_sizes , [ [ 224 , 224 ] ] ) ;
214214 }
215215 } ,
216- MAX_TEST_EXECUTION_TIME ,
216+ MAX_TEST_TIME ,
217217 ) ;
218218
219219 // ViTFeatureExtractor
@@ -233,7 +233,7 @@ describe("Processors", () => {
233233 compare ( reshaped_input_sizes , [ [ 224 , 224 ] ] ) ;
234234 }
235235 } ,
236- MAX_TEST_EXECUTION_TIME ,
236+ MAX_TEST_TIME ,
237237 ) ;
238238
239239 // MobileViTFeatureExtractor
@@ -253,7 +253,7 @@ describe("Processors", () => {
253253 compare ( reshaped_input_sizes , [ [ 256 , 256 ] ] ) ;
254254 }
255255 } ,
256- MAX_TEST_EXECUTION_TIME ,
256+ MAX_TEST_TIME ,
257257 ) ;
258258
259259 // MobileViTFeatureExtractor
@@ -275,7 +275,7 @@ describe("Processors", () => {
275275 compare ( reshaped_input_sizes , [ [ 28 , 28 ] ] ) ;
276276 }
277277 } ,
278- MAX_TEST_EXECUTION_TIME ,
278+ MAX_TEST_TIME ,
279279 ) ;
280280
281281 // MobileViTImageProcessor
@@ -299,7 +299,7 @@ describe("Processors", () => {
299299 compare ( pixel_values . data . slice ( 0 , 3 ) , [ 0.24313725531101227 , 0.250980406999588 , 0.364705890417099 ] ) ;
300300 }
301301 } ,
302- MAX_TEST_EXECUTION_TIME ,
302+ MAX_TEST_TIME ,
303303 ) ;
304304
305305 // DeiTFeatureExtractor
@@ -319,7 +319,7 @@ describe("Processors", () => {
319319 compare ( reshaped_input_sizes , [ [ 224 , 224 ] ] ) ;
320320 }
321321 } ,
322- MAX_TEST_EXECUTION_TIME ,
322+ MAX_TEST_TIME ,
323323 ) ;
324324
325325 // BeitFeatureExtractor
@@ -339,7 +339,7 @@ describe("Processors", () => {
339339 compare ( reshaped_input_sizes , [ [ 224 , 224 ] ] ) ;
340340 }
341341 } ,
342- MAX_TEST_EXECUTION_TIME ,
342+ MAX_TEST_TIME ,
343343 ) ;
344344
345345 // DetrFeatureExtractor
@@ -362,7 +362,7 @@ describe("Processors", () => {
362362 compare ( avg ( pixel_mask . data ) , 1 ) ;
363363 }
364364 } ,
365- MAX_TEST_EXECUTION_TIME ,
365+ MAX_TEST_TIME ,
366366 ) ;
367367
368368 // YolosFeatureExtractor
@@ -382,7 +382,7 @@ describe("Processors", () => {
382382 compare ( reshaped_input_sizes , [ [ 888 , 1333 ] ] ) ;
383383 }
384384 } ,
385- MAX_TEST_EXECUTION_TIME ,
385+ MAX_TEST_TIME ,
386386 ) ;
387387
388388 // DPTFeatureExtractor
@@ -403,7 +403,7 @@ describe("Processors", () => {
403403 compare ( reshaped_input_sizes , [ [ 384 , 384 ] ] ) ;
404404 }
405405 } ,
406- MAX_TEST_EXECUTION_TIME ,
406+ MAX_TEST_TIME ,
407407 ) ;
408408
409409 // GLPNForDepthEstimation
@@ -435,7 +435,7 @@ describe("Processors", () => {
435435 compare ( reshaped_input_sizes , [ [ 384 , 608 ] ] ) ;
436436 }
437437 } ,
438- MAX_TEST_EXECUTION_TIME ,
438+ MAX_TEST_TIME ,
439439 ) ;
440440
441441 // NougatImageProcessor
@@ -456,7 +456,7 @@ describe("Processors", () => {
456456 compare ( reshaped_input_sizes , [ [ 833 , 672 ] ] ) ;
457457 }
458458 } ,
459- MAX_TEST_EXECUTION_TIME ,
459+ MAX_TEST_TIME ,
460460 ) ;
461461
462462 // OwlViTFeatureExtractor
@@ -492,7 +492,7 @@ describe("Processors", () => {
492492 compare ( reshaped_input_sizes , [ [ 224 , 224 ] ] ) ;
493493 }
494494 } ,
495- MAX_TEST_EXECUTION_TIME ,
495+ MAX_TEST_TIME ,
496496 ) ;
497497
498498 // JinaCLIPImageProcessor
@@ -513,7 +513,7 @@ describe("Processors", () => {
513513 compare ( reshaped_input_sizes , [ [ 512 , 512 ] ] ) ;
514514 }
515515 } ,
516- MAX_TEST_EXECUTION_TIME ,
516+ MAX_TEST_TIME ,
517517 ) ;
518518
519519 // VitMatteImageProcessor
@@ -564,7 +564,7 @@ describe("Processors", () => {
564564 compare ( reshaped_input_sizes , [ [ 5 , 3 ] ] ) ;
565565 }
566566 } ,
567- MAX_TEST_EXECUTION_TIME ,
567+ MAX_TEST_TIME ,
568568 ) ;
569569
570570 // BitImageProcessor
@@ -584,7 +584,7 @@ describe("Processors", () => {
584584 compare ( reshaped_input_sizes , [ [ 224 , 224 ] ] ) ;
585585 }
586586 } ,
587- MAX_TEST_EXECUTION_TIME ,
587+ MAX_TEST_TIME ,
588588 ) ;
589589
590590 // DPTImageProcessor
@@ -619,7 +619,7 @@ describe("Processors", () => {
619619 compare ( reshaped_input_sizes , [ [ 252 , 518 ] ] ) ;
620620 }
621621 } ,
622- MAX_TEST_EXECUTION_TIME ,
622+ MAX_TEST_TIME ,
623623 ) ;
624624
625625 // TODO: Add back
@@ -638,7 +638,7 @@ describe("Processors", () => {
638638 // compare(original_sizes, [[480, 640]]);
639639 // compare(reshaped_input_sizes, [[224, 224]]);
640640 // }
641- // }, MAX_TEST_EXECUTION_TIME );
641+ // }, MAX_TEST_TIME );
642642
643643 // Qwen2VLImageProcessor
644644 // - custom image processing (min_pixels, max_pixels)
@@ -659,7 +659,7 @@ describe("Processors", () => {
659659 compare ( reshaped_input_sizes , [ [ 224 , 224 ] ] ) ;
660660 }
661661 } ,
662- MAX_TEST_EXECUTION_TIME ,
662+ MAX_TEST_TIME ,
663663 ) ;
664664
665665 // Idefics3ImageProcessor
@@ -670,9 +670,12 @@ describe("Processors", () => {
670670 const processor = await AutoImageProcessor . from_pretrained ( MODELS . idefics3 ) ;
671671
672672 const image = await load_image ( TEST_IMAGES . gradient_1280x640 ) ;
673-
674673 const image_1 = await image . resize ( 1600 , 1067 ) ;
675674 const image_2 = await image . resize ( 224 , 224 ) ;
675+
676+ const white_image = await load_image ( TEST_IMAGES . white_image ) ;
677+ const white_image_1 = await white_image . resize ( 1600 , 1067 ) ;
678+ const white_image_2 = await white_image . resize ( 224 , 224 ) ;
676679
677680 {
678681 // test no image splitting
@@ -687,6 +690,29 @@ describe("Processors", () => {
687690 compare ( cols , [ [ 0 ] ] ) ;
688691 }
689692
693+ {
694+ // test batched no image splitting
695+ const { pixel_values, pixel_attention_mask, rows, cols } = await processor ( [
696+ [ white_image_1 ] ,
697+ [ white_image_2 ] ,
698+ [ white_image_1 , white_image_2 ] ,
699+ ] , { do_image_splitting : false , return_row_col_info : true } ) ;
700+ compare ( pixel_values . dims , [ 3 , 2 , 3 , 364 , 364 ] ) ;
701+ compare (
702+ pixel_values . mean ( ) . item ( ) ,
703+ 2 / 3 ,
704+ 0.01 , // threshold
705+ ) ;
706+ compare ( pixel_attention_mask . dims , [ 3 , 2 , 364 , 364 ] ) ;
707+ compare (
708+ pixel_attention_mask . mean ( ) . item ( ) ,
709+ 2 / 3 ,
710+ 0.001 , // threshold
711+ ) ;
712+ compare ( rows , [ [ 0 ] , [ 0 ] , [ 0 , 0 ] ] ) ;
713+ compare ( cols , [ [ 0 ] , [ 0 ] , [ 0 , 0 ] ] ) ;
714+ }
715+
690716 {
691717 // test correct patching
692718 const { pixel_values, rows, cols } = await processor ( image , { return_row_col_info : true } ) ;
@@ -718,18 +744,18 @@ describe("Processors", () => {
718744 compare ( cols , [ [ 4 , 4 ] ] ) ;
719745 }
720746
721- // TODO:
722- // { // batched, multiple images
723- // const { pixel_values, rows, cols } = await processor([
724- // [image_1],
725- // [image_1, image_2],
726- // ], { return_row_col_info: true });
727- // compare(pixel_values.dims, [2, 30, 3, 364, 364]);
728- // compare(rows, [[3], [3, 4]]);
729- // compare(cols, [[4], [4, 4]]);
730- // }
747+ {
748+ // batched, multiple images
749+ const { pixel_values, rows, cols } = await processor ( [
750+ [ image_1 ] ,
751+ [ image_1 , image_2 ] ,
752+ ] , { return_row_col_info : true } ) ;
753+ compare ( pixel_values . dims , [ 2 , 30 , 3 , 364 , 364 ] ) ;
754+ compare ( rows , [ [ 3 ] , [ 3 , 4 ] ] ) ;
755+ compare ( cols , [ [ 4 ] , [ 4 , 4 ] ] ) ;
756+ }
731757 } ,
732- MAX_TEST_EXECUTION_TIME ,
758+ MAX_TEST_TIME ,
733759 ) ;
734760 } ) ;
735761
@@ -754,7 +780,7 @@ describe("Processors", () => {
754780 expect ( input_features . data [ 81 ] ) . toBeCloseTo ( 0.10727232694625854 ) ;
755781 expect ( input_features . data [ 3001 ] ) . toBeCloseTo ( 0.2555035352706909 ) ;
756782 } ,
757- MAX_TEST_EXECUTION_TIME ,
783+ MAX_TEST_TIME ,
758784 ) ;
759785
760786 it (
@@ -789,7 +815,7 @@ describe("Processors", () => {
789815 expect ( input_values . data [ 10000 ] ) . toBeCloseTo ( 0.46703237295150757 ) ;
790816 }
791817 } ,
792- MAX_TEST_EXECUTION_TIME ,
818+ MAX_TEST_TIME ,
793819 ) ;
794820
795821 it (
@@ -830,7 +856,7 @@ describe("Processors", () => {
830856 expect ( sum ( attention_mask . data ) ) . toEqual ( 30 ) ;
831857 }
832858 } ,
833- MAX_TEST_EXECUTION_TIME ,
859+ MAX_TEST_TIME ,
834860 ) ;
835861
836862 it (
@@ -883,7 +909,7 @@ describe("Processors", () => {
883909 expect ( input_features . data [ 64063 ] ) . toBeCloseTo ( - 100.0 ) ;
884910 }
885911 } ,
886- MAX_TEST_EXECUTION_TIME ,
912+ MAX_TEST_TIME ,
887913 ) ;
888914
889915 it (
@@ -922,7 +948,7 @@ describe("Processors", () => {
922948 expect ( input_features . data . at ( - 1 ) ) . toBeCloseTo ( - 2.2504329681396484 ) ;
923949 }
924950 } ,
925- MAX_TEST_EXECUTION_TIME ,
951+ MAX_TEST_TIME ,
926952 ) ;
927953 } ) ;
928954
@@ -1132,7 +1158,7 @@ describe("Processors", () => {
11321158 }
11331159 } ) ;
11341160 } ,
1135- MAX_TEST_EXECUTION_TIME ,
1161+ MAX_TEST_TIME ,
11361162 ) ;
11371163
11381164 describe (
@@ -1168,7 +1194,7 @@ describe("Processors", () => {
11681194 compare ( image_grid_thw . dims , [ 1 , 3 ] ) ;
11691195 } ) ;
11701196 } ,
1171- MAX_TEST_EXECUTION_TIME ,
1197+ MAX_TEST_TIME ,
11721198 ) ;
11731199 } ) ;
11741200} ) ;
0 commit comments