Skip to content

Commit 23647e2

Browse files
Add option to maintain aspect ratio on resize (#971)
* Add option to resize one dimension and maintain aspect ratio. * Update src/utils/image.js Reduce function calls for optimisation. Co-authored-by: Joshua Lochner <[email protected]> * Optimise image utils tests. * nullish dimension may be `null`, `undefined`, or `-1` --------- Co-authored-by: Joshua Lochner <[email protected]>
1 parent d38134d commit 23647e2

File tree

3 files changed

+77
-21
lines changed

3 files changed

+77
-21
lines changed

src/utils/core.js

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11

22
/**
33
* @file Core utility functions/classes for Transformers.js.
4-
*
4+
*
55
* These are only used internally, meaning an end-user shouldn't
66
* need to access anything here.
7-
*
7+
*
88
* @module utils/core
99
*/
1010

@@ -65,7 +65,7 @@ export function escapeRegExp(string) {
6565
* Check if a value is a typed array.
6666
* @param {*} val The value to check.
6767
* @returns {boolean} True if the value is a `TypedArray`, false otherwise.
68-
*
68+
*
6969
* Adapted from https://stackoverflow.com/a/71091338/13989043
7070
*/
7171
export function isTypedArray(val) {
@@ -82,6 +82,15 @@ export function isIntegralNumber(x) {
8282
return Number.isInteger(x) || typeof x === 'bigint'
8383
}
8484

85+
/**
86+
* Determine if a provided width or height is nullish.
87+
* @param {*} x The value to check.
88+
* @returns {boolean} True if the value is `null`, `undefined` or `-1`, false otherwise.
89+
*/
90+
export function isNullishDimension(x) {
91+
return x === null || x === undefined || x === -1;
92+
}
93+
8594
/**
8695
* Calculates the dimensions of a nested array.
8796
*
@@ -151,9 +160,9 @@ export function calculateReflectOffset(i, w) {
151160
}
152161

153162
/**
154-
*
155-
* @param {Object} o
156-
* @param {string[]} props
163+
*
164+
* @param {Object} o
165+
* @param {string[]} props
157166
* @returns {Object}
158167
*/
159168
export function pick(o, props) {
@@ -170,7 +179,7 @@ export function pick(o, props) {
170179
/**
171180
* Calculate the length of a string, taking multi-byte characters into account.
172181
* This mimics the behavior of Python's `len` function.
173-
* @param {string} s The string to calculate the length of.
182+
* @param {string} s The string to calculate the length of.
174183
* @returns {number} The length of the string.
175184
*/
176185
export function len(s) {

src/utils/image.js

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11

22
/**
3-
* @file Helper module for image processing.
4-
*
5-
* These functions and classes are only used internally,
3+
* @file Helper module for image processing.
4+
*
5+
* These functions and classes are only used internally,
66
* meaning an end-user shouldn't need to access anything here.
7-
*
7+
*
88
* @module utils/image
99
*/
1010

11+
import { isNullishDimension } from './core.js';
1112
import { getFile } from './hub.js';
1213
import { env } from '../env.js';
1314
import { Tensor } from './tensor.js';
@@ -91,7 +92,7 @@ export class RawImage {
9192
this.channels = channels;
9293
}
9394

94-
/**
95+
/**
9596
* Returns the size of the image (width, height).
9697
* @returns {[number, number]} The size of the image (width, height).
9798
*/
@@ -101,9 +102,9 @@ export class RawImage {
101102

102103
/**
103104
* Helper method for reading an image from a variety of input types.
104-
* @param {RawImage|string|URL} input
105+
* @param {RawImage|string|URL} input
105106
* @returns The image object.
106-
*
107+
*
107108
* **Example:** Read image from a URL.
108109
* ```javascript
109110
* let image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
@@ -181,7 +182,7 @@ export class RawImage {
181182

182183
/**
183184
* Helper method to create a new Image from a tensor
184-
* @param {Tensor} tensor
185+
* @param {Tensor} tensor
185186
*/
186187
static fromTensor(tensor, channel_format = 'CHW') {
187188
if (tensor.dims.length !== 3) {
@@ -306,8 +307,8 @@ export class RawImage {
306307

307308
/**
308309
* Resize the image to the given dimensions. This method uses the canvas API to perform the resizing.
309-
* @param {number} width The width of the new image.
310-
* @param {number} height The height of the new image.
310+
* @param {number} width The width of the new image. `null` or `-1` will preserve the aspect ratio.
311+
* @param {number} height The height of the new image. `null` or `-1` will preserve the aspect ratio.
311312
* @param {Object} options Additional options for resizing.
312313
* @param {0|1|2|3|4|5|string} [options.resample] The resampling method to use.
313314
* @returns {Promise<RawImage>} `this` to support chaining.
@@ -324,6 +325,20 @@ export class RawImage {
324325
// Ensure resample method is a string
325326
let resampleMethod = RESAMPLING_MAPPING[resample] ?? resample;
326327

328+
// Calculate width / height to maintain aspect ratio, in the event that
329+
// the user passed a null value in.
330+
// This allows users to pass in something like `resize(320, null)` to
331+
// resize to 320 width, but maintain aspect ratio.
332+
const nullish_width = isNullishDimension(width);
333+
const nullish_height = isNullishDimension(height);
334+
if (nullish_width && nullish_height) {
335+
return this;
336+
} else if (nullish_width) {
337+
width = (height / this.height) * this.width;
338+
} else if (nullish_height) {
339+
height = (width / this.width) * this.height;
340+
}
341+
327342
if (BROWSER_ENV) {
328343
// TODO use `resample` in browser environment
329344

@@ -360,7 +375,7 @@ export class RawImage {
360375
case 'nearest':
361376
case 'bilinear':
362377
case 'bicubic':
363-
// Perform resizing using affine transform.
378+
// Perform resizing using affine transform.
364379
// This matches how the python Pillow library does it.
365380
img = img.affine([width / this.width, 0, 0, height / this.height], {
366381
interpolator: resampleMethod
@@ -373,7 +388,7 @@ export class RawImage {
373388
img = img.resize({
374389
width, height,
375390
fit: 'fill',
376-
kernel: 'lanczos3', // PIL Lanczos uses a kernel size of 3
391+
kernel: 'lanczos3', // PIL Lanczos uses a kernel size of 3
377392
});
378393
break;
379394

@@ -452,7 +467,7 @@ export class RawImage {
452467
// Create canvas object for this image
453468
const canvas = this.toCanvas();
454469

455-
// Create a new canvas of the desired size. This is needed since if the
470+
// Create a new canvas of the desired size. This is needed since if the
456471
// image is too small, we need to pad it with black pixels.
457472
const ctx = createCanvasFunction(crop_width, crop_height).getContext('2d');
458473

@@ -500,7 +515,7 @@ export class RawImage {
500515
// Create canvas object for this image
501516
const canvas = this.toCanvas();
502517

503-
// Create a new canvas of the desired size. This is needed since if the
518+
// Create a new canvas of the desired size. This is needed since if the
504519
// image is too small, we need to pad it with black pixels.
505520
const ctx = createCanvasFunction(crop_width, crop_height).getContext('2d');
506521

tests/utils/utils.test.js

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { AutoProcessor, hamming, hanning, mel_filter_bank } from "../../src/transformers.js";
22
import { getFile } from "../../src/utils/hub.js";
3+
import { RawImage } from "../../src/utils/image.js";
34

45
import { MAX_TEST_EXECUTION_TIME } from "../init.js";
56
import { compare } from "../test_utils.js";
@@ -59,4 +60,35 @@ describe("Utilities", () => {
5960
expect(await data.text()).toBe("Hello, world!");
6061
});
6162
});
63+
64+
describe("Image utilities", () => {
65+
let image;
66+
beforeAll(async () => {
67+
image = await RawImage.fromURL("https://picsum.photos/300/200");
68+
});
69+
70+
it("Read image from URL", async () => {
71+
expect(image.width).toBe(300);
72+
expect(image.height).toBe(200);
73+
expect(image.channels).toBe(3);
74+
});
75+
76+
it("Can resize image", async () => {
77+
const resized = await image.resize(150, 100);
78+
expect(resized.width).toBe(150);
79+
expect(resized.height).toBe(100);
80+
});
81+
82+
it("Can resize with aspect ratio", async () => {
83+
const resized = await image.resize(150, null);
84+
expect(resized.width).toBe(150);
85+
expect(resized.height).toBe(100);
86+
});
87+
88+
it("Returns original image if width and height are null", async () => {
89+
const resized = await image.resize(null, null);
90+
expect(resized.width).toBe(300);
91+
expect(resized.height).toBe(200);
92+
});
93+
});
6294
});

0 commit comments

Comments
 (0)