Skip to content

Commit 2710212

Browse files
authored
chore(shared): add localImg2Base64 method (#979)
* chore(core): remove unused elementSnapshot * chore(shared): remove unused code * chore(shared): reafactor localImg2Base64 * chore(shared): log content-type when url is not a image
1 parent 698c7f9 commit 2710212

File tree

11 files changed

+89
-157
lines changed

11 files changed

+89
-157
lines changed

packages/core/src/image/index.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
export {
22
imageInfo,
33
imageInfoOfBase64,
4-
base64Encoded,
4+
localImg2Base64,
5+
httpImg2Base64,
56
resizeImg,
6-
transformImgPathToBase64,
77
saveBase64Image,
88
zoomForGPT4o,
99
} from '@midscene/shared/img';

packages/core/tests/ai/connectivity.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { existsSync } from 'node:fs';
22
import path from 'node:path';
33
import { AIActionType } from '@/ai-model/common';
44
import { call, callToGetJSONObject } from '@/ai-model/service-caller';
5-
import { base64Encoded } from '@/image';
5+
import { localImg2Base64 } from '@/image';
66
import dotenv from 'dotenv';
77
import { getFixture } from 'tests/utils';
88
import { beforeAll, describe, expect, it, vi } from 'vitest';
@@ -121,7 +121,7 @@ vi.setConfig({
121121
{
122122
type: 'image_url',
123123
image_url: {
124-
url: base64Encoded(imagePath),
124+
url: localImg2Base64(imagePath),
125125
detail: 'high',
126126
},
127127
},

packages/core/tests/ai/streaming.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { existsSync } from 'node:fs';
22
import path from 'node:path';
33
import { AIActionType } from '@/ai-model/common';
44
import { call } from '@/ai-model/service-caller';
5-
import { base64Encoded } from '@/image';
5+
import { localImg2Base64 } from '@/image';
66
import type { AIUsageInfo, CodeGenerationChunk } from '@/types';
77
import dotenv from 'dotenv';
88
import { getFixture } from 'tests/utils';
@@ -104,7 +104,7 @@ describe(
104104
{
105105
type: 'image_url',
106106
image_url: {
107-
url: base64Encoded(imagePath),
107+
url: localImg2Base64(imagePath),
108108
detail: 'high',
109109
},
110110
},

packages/core/tests/evaluation.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { existsSync, readFileSync } from 'node:fs';
22
import path from 'node:path';
33
import { describeUserPage } from '@/index';
44
import { vlLocateMode } from '@midscene/shared/env';
5-
import { base64Encoded, imageInfoOfBase64 } from '@midscene/shared/img';
5+
import { imageInfoOfBase64, localImg2Base64 } from '@midscene/shared/img';
66

77
export async function buildContext(targetDir: string): Promise<{
88
context: {
@@ -23,7 +23,7 @@ export async function buildContext(targetDir: string): Promise<{
2323
targetDir,
2424
existsSync(path.join(targetDir, 'input.png')) ? 'input.png' : 'input.jpeg',
2525
);
26-
const originalScreenshotBase64 = base64Encoded(originalInputImgP);
26+
const originalScreenshotBase64 = localImg2Base64(originalInputImgP);
2727

2828
const resizeOutputImgP = path.join(targetDir, 'output_without_text.png');
2929
const snapshotJsonPath = path.join(targetDir, 'element-snapshot.json');
@@ -58,13 +58,12 @@ export async function buildContext(targetDir: string): Promise<{
5858
}
5959

6060
const snapshotJson = readFileSync(snapshotJsonPath, { encoding: 'utf-8' });
61-
const elementSnapshot = JSON.parse(snapshotJson);
6261
const elementTree = JSON.parse(
6362
readFileSync(elementTreeJsonPath, { encoding: 'utf-8' }),
6463
);
6564
const screenshotBase64 = vlLocateMode()
6665
? originalScreenshotBase64
67-
: base64Encoded(resizeOutputImgP);
66+
: localImg2Base64(resizeOutputImgP);
6867

6968
const size = await imageInfoOfBase64(screenshotBase64);
7069
const baseContext = {

packages/core/tests/utils.ts

Lines changed: 4 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
1-
import { readFileSync, writeFileSync } from 'node:fs';
2-
import path, { join } from 'node:path';
1+
import { writeFileSync } from 'node:fs';
2+
import { join } from 'node:path';
33
import type { callAiFn } from '@/ai-model/common';
4-
import {
5-
base64Encoded,
6-
imageInfoOfBase64,
7-
transformImgPathToBase64,
8-
} from '@/image';
4+
import { localImg2Base64 } from '@/image';
95
import Insight from '@/insight';
106
import type { AIElementLocatorResponse, BaseElement, UIContext } from '@/types';
117
import { NodeType } from '@midscene/shared/constants';
@@ -31,7 +27,7 @@ export function sleep(ms: number) {
3127
export function fakeInsight(content: string) {
3228
const screenshot = getFixture('baidu.png');
3329
const basicContext = {
34-
screenshotBase64: base64Encoded(screenshot),
30+
screenshotBase64: localImg2Base64(screenshot),
3531
size: { width: 1920, height: 1080 },
3632
content: [
3733
{
@@ -86,30 +82,3 @@ export function fakeInsight(content: string) {
8682

8783
return insight;
8884
}
89-
90-
export function generateUIContext(testDataPath: string) {
91-
return async () => {
92-
const screenshotBase64 = await transformImgPathToBase64(
93-
path.join(testDataPath, 'input.png'),
94-
);
95-
const size = await imageInfoOfBase64(screenshotBase64);
96-
97-
const captureElementSnapshot = readFileSync(
98-
path.join(testDataPath, 'element-snapshot.json'),
99-
'utf-8',
100-
);
101-
102-
// align element
103-
const elementsInfo = JSON.parse(captureElementSnapshot) as BaseElement[];
104-
105-
const baseContext = {
106-
size: { width: size.width, height: size.height },
107-
content: elementsInfo,
108-
screenshotBase64: `data:image/png;base64,${screenshotBase64}`,
109-
};
110-
111-
return {
112-
...baseContext,
113-
};
114-
};
115-
}

packages/evaluation/tests/util.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ import type { PlanningAIResponse, Rect } from '@midscene/core';
44
import { NodeType } from '@midscene/shared/constants';
55
import { vlLocateMode } from '@midscene/shared/env';
66
import {
7-
base64Encoded,
87
compositeElementInfoImg,
98
imageInfoOfBase64,
9+
localImg2Base64,
1010
} from '@midscene/shared/img';
1111
import { parseContextFromWebPage } from '@midscene/web';
1212

@@ -197,7 +197,7 @@ export function getCases(
197197
}
198198

199199
export async function buildContextByImage(imagePath: string) {
200-
const screenshotBase64 = base64Encoded(imagePath);
200+
const screenshotBase64 = localImg2Base64(imagePath);
201201
const size = await imageInfoOfBase64(screenshotBase64);
202202

203203
const fakePage = {
@@ -221,7 +221,7 @@ export async function buildContextByImage(imagePath: string) {
221221
export async function buildContext(pageName: string) {
222222
const targetDir = path.join(__dirname, '../page-data/', pageName);
223223
const screenshotBase64Path = path.join(targetDir, 'input.png');
224-
const screenshotBase64 = base64Encoded(screenshotBase64Path);
224+
const screenshotBase64 = localImg2Base64(screenshotBase64Path);
225225
const size = await imageInfoOfBase64(screenshotBase64);
226226

227227
const fakePage = {

packages/shared/src/img/index.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,20 @@ export {
22
imageInfo,
33
imageInfoOfBase64,
44
bufferFromBase64,
5-
base64Encoded,
65
isValidPNGImageBuffer,
76
} from './info';
87
export {
9-
trimImage,
108
resizeImg,
119
resizeImgBase64,
12-
transformImgPathToBase64,
1310
zoomForGPT4o,
1411
saveBase64Image,
1512
paddingToMatchBlock,
1613
paddingToMatchBlockByBase64,
1714
cropByRect,
1815
jimpFromBase64,
1916
jimpToBase64,
17+
localImg2Base64,
18+
httpImg2Base64,
2019
} from './transform';
2120
export { processImageElementInfo, compositeElementInfoImg } from './box-select';
2221
export { drawBoxOnImage, savePositionImg } from './draw-box';

packages/shared/src/img/info.ts

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import assert from 'node:assert';
22
import { Buffer } from 'node:buffer';
3-
import { readFileSync } from 'node:fs';
43
import type Jimp from 'jimp';
54
import getJimp from './get-jimp';
65

@@ -64,31 +63,6 @@ export async function bufferFromBase64(imageBase64: string): Promise<Buffer> {
6463
return Buffer.from(base64Data, 'base64');
6564
}
6665

67-
/**
68-
* Encodes an image file to a base64 encoded string
69-
*
70-
* @param image The path of the image file
71-
* @param withHeader Determine whether to return data including the file header information, the default is true
72-
*
73-
* @returns The base64 encoded string of the image file, which may or may not include header information depending on the withHeader parameter
74-
*
75-
* @throws When the image type is not supported, an error will be thrown
76-
*/
77-
export function base64Encoded(image: string, withHeader = true) {
78-
// get base64 encoded image
79-
const imageBuffer = readFileSync(image);
80-
if (!withHeader) {
81-
return imageBuffer.toString('base64');
82-
}
83-
if (image.endsWith('png')) {
84-
return `data:image/png;base64,${imageBuffer.toString('base64')}`;
85-
}
86-
if (image.endsWith('jpg') || image.endsWith('jpeg')) {
87-
return `data:image/jpeg;base64,${imageBuffer.toString('base64')}`;
88-
}
89-
throw new Error('unsupported image type');
90-
}
91-
9266
/**
9367
* Check if the Buffer is a valid PNG image
9468
* @param buffer The Buffer to check

packages/shared/src/img/transform.ts

Lines changed: 41 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import assert from 'node:assert';
22
import { Buffer } from 'node:buffer';
3-
3+
import { readFileSync } from 'node:fs';
44
import getDebug from 'debug';
55
import type Jimp from 'jimp';
66
import type { Rect } from 'src/types';
77
import getJimp from './get-jimp';
88
const debugImg = getDebug('img');
9+
import path from 'node:path';
10+
911
/**
1012
/**
1113
* Saves a Base64-encoded image to a file
@@ -34,22 +36,6 @@ export async function saveBase64Image(options: {
3436
debugImg(`saveBase64Image done: ${options.outputPath}`);
3537
}
3638

37-
/**
38-
* Transforms an image path into a base64-encoded string
39-
* @param inputPath - The path of the image file to be encoded
40-
* @returns A Promise that resolves to a base64-encoded string representing the image file
41-
*/
42-
export async function transformImgPathToBase64(inputPath: string) {
43-
// Use Jimp to process images and generate base64 data
44-
debugImg(`transformImgPathToBase64 start: ${inputPath}`);
45-
const Jimp = await getJimp();
46-
const image = await Jimp.read(inputPath);
47-
const buffer = await image.getBufferAsync(Jimp.MIME_JPEG);
48-
const res = buffer.toString('base64');
49-
debugImg(`transformImgPathToBase64 done: ${inputPath}`);
50-
return res;
51-
}
52-
5339
/**
5440
* Resizes an image from a base64-encoded string
5541
*
@@ -168,50 +154,6 @@ export function zoomForGPT4o(originalWidth: number, originalHeight: number) {
168154
};
169155
}
170156

171-
/**
172-
* Trims an image and returns the trimming information, including the offset from the left and top edges, and the trimmed width and height
173-
*
174-
* @param image - The image to be trimmed. This can be a file path or a Buffer object containing the image data
175-
* @returns A Promise that resolves to an object containing the trimming information. If the image does not need to be trimmed, this object will be null
176-
*/
177-
export async function trimImage(image: string | Buffer): Promise<{
178-
trimOffsetLeft: number; // attention: trimOffsetLeft is a negative number
179-
trimOffsetTop: number; // so as trimOffsetTop
180-
width: number;
181-
height: number;
182-
} | null> {
183-
const Jimp = await getJimp();
184-
const jimpImage = await Jimp.read(
185-
Buffer.isBuffer(image) ? image : Buffer.from(image),
186-
);
187-
const { width, height } = jimpImage.bitmap;
188-
189-
if (width <= 3 || height <= 3) {
190-
return null;
191-
}
192-
193-
const trimmedImage = jimpImage.autocrop();
194-
const { width: trimmedWidth, height: trimmedHeight } = trimmedImage.bitmap;
195-
196-
const trimOffsetLeft = (width - trimmedWidth) / 2;
197-
const trimOffsetTop = (height - trimmedHeight) / 2;
198-
199-
if (trimOffsetLeft === 0 && trimOffsetTop === 0) {
200-
return null;
201-
}
202-
203-
return {
204-
trimOffsetLeft: -trimOffsetLeft,
205-
trimOffsetTop: -trimOffsetTop,
206-
width: trimmedWidth,
207-
height: trimmedHeight,
208-
};
209-
}
210-
211-
export function prependBase64Header(base64: string, mimeType = 'image/png') {
212-
return `data:${mimeType};base64,${base64}`;
213-
}
214-
215157
export async function jimpFromBase64(base64: string): Promise<Jimp> {
216158
const Jimp = await getJimp();
217159
const imageBuffer = await bufferFromBase64(base64);
@@ -269,3 +211,41 @@ export async function jimpToBase64(image: Jimp): Promise<string> {
269211
const Jimp = await getJimp();
270212
return image.getBase64Async(Jimp.MIME_JPEG);
271213
}
214+
215+
export const httpImg2Base64 = async (url: string): Promise<string> => {
216+
const response = await fetch(url);
217+
if (!response.ok) {
218+
throw new Error(`Failed to fetch image: ${url}`);
219+
}
220+
const contentType = response.headers.get('content-type');
221+
if (!contentType) {
222+
throw new Error(`Failed to fetch image: ${url}`);
223+
}
224+
assert(
225+
contentType.startsWith('image/'),
226+
`The url ${url} is not a image, because of content-type in header is ${contentType}.`,
227+
);
228+
const buffer = Buffer.from(await response.arrayBuffer());
229+
return `data:${contentType};base64,${buffer.toString('base64')}`;
230+
};
231+
232+
/**
233+
* Convert image file to base64 string
234+
* Because this method is synchronous, the npm package `sharp` cannot be used to detect the file type.
235+
* TODO: convert to webp to reduce base64 size.
236+
*/
237+
export const localImg2Base64 = (
238+
imgPath: string,
239+
withoutHeader = false,
240+
): string => {
241+
const body = readFileSync(imgPath).toString('base64');
242+
if (withoutHeader) {
243+
return body;
244+
}
245+
246+
// Detect image type by extname.
247+
const type = path.extname(imgPath).slice(1);
248+
const finalType = type === 'svg' ? 'svg+xml' : type || 'jpg';
249+
250+
return `data:image/${finalType};base64,${body}`;
251+
};

0 commit comments

Comments
 (0)