Skip to content

Commit 92fd9b6

Browse files
committed
Fix jina clip processor
1 parent f58758a commit 92fd9b6

File tree

4 files changed

+71
-2
lines changed

4 files changed

+71
-2
lines changed
Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
1-
import {
1+
import {
22
ImageProcessor,
33
} from "../../base/image_processors_utils.js";
44

5-
export class JinaCLIPImageProcessor extends ImageProcessor {}
5+
export class JinaCLIPImageProcessor extends ImageProcessor {
6+
constructor(config) {
7+
// JinaCLIPImageProcessor uses a custom preprocessor_config.json, so we configure it here
8+
const { resize_mode, fill_color, interpolation, size, ...other } = config;
9+
10+
const new_size = resize_mode === 'squash'
11+
? { width: size, height: size }
12+
: resize_mode === 'shortest'
13+
? { shortest_edge: size }
14+
: { longest_edge: size };
15+
16+
const resample = interpolation === 'bicubic' ? 3 : 2;
17+
super({
18+
...other,
19+
size: new_size,
20+
resample,
21+
do_center_crop: true,
22+
crop_size: size,
23+
do_normalize: true,
24+
});
25+
}
26+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
2+
import { Processor } from "../../base/processing_utils.js";
3+
import { AutoImageProcessor } from "../auto/image_processing_auto.js";
4+
import { AutoTokenizer } from "../../tokenizers.js";
5+
6+
export class JinaCLIPProcessor extends Processor {
7+
static tokenizer_class = AutoTokenizer
8+
static image_processor_class = AutoImageProcessor
9+
10+
async _call(text=null, images=null, kwargs = {}) {
11+
12+
if (!text && images){
13+
throw new Error('Either text or images must be provided');
14+
}
15+
16+
const text_inputs = text ? this.tokenizer(text, kwargs) : {};
17+
const image_inputs = images ? await this.image_processor(images, kwargs) : {};
18+
19+
return {
20+
...text_inputs,
21+
...image_inputs,
22+
}
23+
}
24+
}

src/models/processors.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
export * from './florence2/processing_florence2.js';
22
export * from './mgp_str/processing_mgp_str.js';
33
export * from './janus/processing_janus.js';
4+
export * from './jina_clip/processing_jina_clip.js';
45
export * from './owlvit/processing_owlvit.js';
56
export * from './pyannote/processing_pyannote.js';
67
export * from './qwen2_vl/processing_qwen2_vl.js';

tests/processors.test.js

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ env.useFSCache = false;
1010
const sum = (array) => Number(array.reduce((a, b) => a + b, array instanceof BigInt64Array ? 0n : 0));
1111
const avg = (array) => sum(array) / array.length;
1212

13+
/** @type {Map<string, RawImage>} */
1314
const IMAGE_CACHE = new Map();
1415
const load_image = async (url) => {
1516
const cached = IMAGE_CACHE.get(url);
@@ -40,6 +41,7 @@ const MODELS = {
4041
nougat: "Xenova/nougat-small",
4142
owlvit: "Xenova/owlvit-base-patch32",
4243
clip: "Xenova/clip-vit-base-patch16",
44+
jina_clip: "jinaai/jina-clip-v2",
4345
vitmatte: "Xenova/vitmatte-small-distinctions-646",
4446
dinov2: "Xenova/dinov2-small-imagenet1k-1-layer",
4547
// efficientnet: 'Xenova/efficientnet-b0',
@@ -490,6 +492,27 @@ describe("Processors", () => {
490492
MAX_TEST_EXECUTION_TIME,
491493
);
492494

495+
// JinaCLIPImageProcessor
496+
// - custom config overrides
497+
it(
498+
MODELS.jina_clip,
499+
async () => {
500+
const processor = await AutoImageProcessor.from_pretrained(MODELS.jina_clip);
501+
502+
{
503+
const image = await load_image(TEST_IMAGES.tiger);
504+
const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
505+
506+
compare(pixel_values.dims, [1, 3, 512, 512]);
507+
compare(avg(pixel_values.data), -0.06637834757566452);
508+
509+
compare(original_sizes, [[408, 612]]);
510+
compare(reshaped_input_sizes, [[512, 512]]);
511+
}
512+
},
513+
MAX_TEST_EXECUTION_TIME,
514+
);
515+
493516
// VitMatteImageProcessor
494517
// - tests custom overrides
495518
// - tests multiple inputs

0 commit comments

Comments
 (0)