@@ -48,7 +48,7 @@ import {
48
48
globalConfigManager ,
49
49
globalModelConfigManager ,
50
50
} from '@midscene/shared/env' ;
51
- import { resizeImgBase64 } from '@midscene/shared/img' ;
51
+ import { imageInfoOfBase64 , resizeImgBase64 } from '@midscene/shared/img' ;
52
52
import { getDebug } from '@midscene/shared/logger' ;
53
53
import { assert } from '@midscene/shared/utils' ;
54
54
// import type { AndroidDeviceInputOpt } from '../device';
@@ -136,10 +136,15 @@ export class Agent<
136
136
private hasWarnedNonVLModel = false ;
137
137
138
138
/**
139
- * Screenshot scale factor for AI model processing
139
+ * Screenshot scale factor derived from actual screenshot dimensions
140
140
*/
141
141
private screenshotScale ?: number ;
142
142
143
+ /**
144
+ * Internal promise to deduplicate screenshot scale computation
145
+ */
146
+ private screenshotScalePromise ?: Promise < number > ;
147
+
143
148
// @deprecated use .interface instead
144
149
get page ( ) {
145
150
return this . interface ;
@@ -161,6 +166,52 @@ export class Agent<
161
166
}
162
167
}
163
168
169
+ /**
170
+ * Lazily compute the ratio between the physical screenshot width and the logical page width
171
+ */
172
+ private async getScreenshotScale ( context : UIContext ) : Promise < number > {
173
+ if ( this . screenshotScale !== undefined ) {
174
+ return this . screenshotScale ;
175
+ }
176
+
177
+ if ( ! this . screenshotScalePromise ) {
178
+ this . screenshotScalePromise = ( async ( ) => {
179
+ const pageWidth = context . size ?. width ;
180
+ assert (
181
+ pageWidth && pageWidth > 0 ,
182
+ `Invalid page width when computing screenshot scale: ${ pageWidth } ` ,
183
+ ) ;
184
+
185
+ const { width : screenshotWidth } = await imageInfoOfBase64 (
186
+ context . screenshotBase64 ,
187
+ ) ;
188
+
189
+ assert (
190
+ Number . isFinite ( screenshotWidth ) && screenshotWidth > 0 ,
191
+ `Invalid screenshot width when computing screenshot scale: ${ screenshotWidth } ` ,
192
+ ) ;
193
+
194
+ const computedScale = screenshotWidth / pageWidth ;
195
+ assert (
196
+ Number . isFinite ( computedScale ) && computedScale > 0 ,
197
+ `Invalid computed screenshot scale: ${ computedScale } ` ,
198
+ ) ;
199
+
200
+ debug (
201
+ `Computed screenshot scale ${ computedScale } from screenshot width ${ screenshotWidth } and page width ${ pageWidth } ` ,
202
+ ) ;
203
+ return computedScale ;
204
+ } ) ( ) ;
205
+ }
206
+
207
+ try {
208
+ this . screenshotScale = await this . screenshotScalePromise ;
209
+ return this . screenshotScale ;
210
+ } finally {
211
+ this . screenshotScalePromise = undefined ;
212
+ }
213
+ }
214
+
164
215
constructor ( interfaceInstance : InterfaceType , opts ?: AgentOpt ) {
165
216
this . interface = interfaceInstance ;
166
217
this . opts = Object . assign (
@@ -182,7 +233,6 @@ export class Agent<
182
233
? new ModelConfigManager ( opts . modelConfig )
183
234
: globalModelConfigManager ;
184
235
185
- this . screenshotScale = opts ?. screenshotScale ;
186
236
this . onTaskStartTip = this . opts . onTaskStartTip ;
187
237
188
238
this . insight = new Insight ( async ( action : InsightAction ) => {
@@ -237,33 +287,22 @@ export class Agent<
237
287
} ) ;
238
288
}
239
289
240
- // Unified screenshot scaling: prioritize screenshotScale, otherwise use DPR
241
- let targetWidth = context . size . width ;
242
- let targetHeight = context . size . height ;
243
- let needResize = false ;
244
-
245
- if ( this . screenshotScale && this . screenshotScale !== 1 ) {
246
- // User-specified scaling ratio
247
- debug ( `Applying user screenshot scale: ${ this . screenshotScale } ` ) ;
248
- targetWidth = Math . round ( context . size . width * this . screenshotScale ) ;
249
- targetHeight = Math . round ( context . size . height * this . screenshotScale ) ;
250
- needResize = true ;
251
- } else if ( context . size . dpr && context . size . dpr !== 1 ) {
252
- // No user-specified scaling, use DPR scaling to logical size
290
+ const computedScreenshotScale = await this . getScreenshotScale ( context ) ;
291
+
292
+ if ( computedScreenshotScale !== 1 ) {
293
+ const scaleForLog = Number . parseFloat ( computedScreenshotScale . toFixed ( 4 ) ) ;
253
294
debug (
254
- `Applying DPR scaling : ${ context . size . dpr } (resize to logical size)` ,
295
+ `Applying computed screenshot scale : ${ scaleForLog } (resize to logical size)` ,
255
296
) ;
256
- // Target is logical size, no need to change targetWidth/targetHeight
257
- needResize = true ;
258
- }
259
-
260
- // Execute scaling
261
- if ( needResize ) {
297
+ const targetWidth = Math . round ( context . size . width ) ;
298
+ const targetHeight = Math . round ( context . size . height ) ;
262
299
debug ( `Resizing screenshot to ${ targetWidth } x${ targetHeight } ` ) ;
263
300
context . screenshotBase64 = await resizeImgBase64 (
264
301
context . screenshotBase64 ,
265
302
{ width : targetWidth , height : targetHeight } ,
266
303
) ;
304
+ } else {
305
+ debug ( `screenshot scale=${ computedScreenshotScale } ` ) ;
267
306
}
268
307
269
308
return context ;
@@ -867,12 +906,18 @@ export class Agent<
867
906
868
907
const { element } = output ;
869
908
909
+ const dprValue = await ( this . interface . size ( ) as any ) . dpr ;
910
+ const dprEntry = dprValue
911
+ ? {
912
+ dpr : dprValue ,
913
+ }
914
+ : { } ;
870
915
return {
871
916
rect : element ?. rect ,
872
917
center : element ?. center ,
873
- dpr : ( await this . interface . size ( ) ) . dpr ,
918
+ ... dprEntry ,
874
919
} as Pick < LocateResultElement , 'rect' | 'center' > & {
875
- dpr : number ;
920
+ dpr ? : number ; // this field is deprecated
876
921
} ;
877
922
}
878
923
0 commit comments