@@ -48,7 +48,7 @@ import {
48
48
globalConfigManager ,
49
49
globalModelConfigManager ,
50
50
} from '@midscene/shared/env' ;
51
- import { resizeImgBase64 } from '@midscene/shared/img' ;
51
+ import { imageInfoOfBase64 , resizeImgBase64 } from '@midscene/shared/img' ;
52
52
import { getDebug } from '@midscene/shared/logger' ;
53
53
import { assert } from '@midscene/shared/utils' ;
54
54
// import type { AndroidDeviceInputOpt } from '../device';
@@ -125,10 +125,15 @@ export class Agent<
125
125
private hasWarnedNonVLModel = false ;
126
126
127
127
/**
128
- * Screenshot scale factor for AI model processing
128
+ * Screenshot scale factor derived from actual screenshot dimensions
129
129
*/
130
130
private screenshotScale ?: number ;
131
131
132
+ /**
133
+ * Internal promise to deduplicate screenshot scale computation
134
+ */
135
+ private screenshotScalePromise ?: Promise < number > ;
136
+
132
137
// @deprecated use .interface instead
133
138
get page ( ) {
134
139
return this . interface ;
@@ -150,6 +155,52 @@ export class Agent<
150
155
}
151
156
}
152
157
158
+ /**
159
+ * Lazily compute the ratio between the physical screenshot width and the logical page width
160
+ */
161
+ private async getScreenshotScale ( context : UIContext ) : Promise < number > {
162
+ if ( this . screenshotScale !== undefined ) {
163
+ return this . screenshotScale ;
164
+ }
165
+
166
+ if ( ! this . screenshotScalePromise ) {
167
+ this . screenshotScalePromise = ( async ( ) => {
168
+ const pageWidth = context . size ?. width ;
169
+ assert (
170
+ pageWidth && pageWidth > 0 ,
171
+ `Invalid page width when computing screenshot scale: ${ pageWidth } ` ,
172
+ ) ;
173
+
174
+ const { width : screenshotWidth } = await imageInfoOfBase64 (
175
+ context . screenshotBase64 ,
176
+ ) ;
177
+
178
+ assert (
179
+ Number . isFinite ( screenshotWidth ) && screenshotWidth > 0 ,
180
+ `Invalid screenshot width when computing screenshot scale: ${ screenshotWidth } ` ,
181
+ ) ;
182
+
183
+ const computedScale = screenshotWidth / pageWidth ;
184
+ assert (
185
+ Number . isFinite ( computedScale ) && computedScale > 0 ,
186
+ `Invalid computed screenshot scale: ${ computedScale } ` ,
187
+ ) ;
188
+
189
+ debug (
190
+ `Computed screenshot scale ${ computedScale } from screenshot width ${ screenshotWidth } and page width ${ pageWidth } ` ,
191
+ ) ;
192
+ return computedScale ;
193
+ } ) ( ) ;
194
+ }
195
+
196
+ try {
197
+ this . screenshotScale = await this . screenshotScalePromise ;
198
+ return this . screenshotScale ;
199
+ } finally {
200
+ this . screenshotScalePromise = undefined ;
201
+ }
202
+ }
203
+
153
204
constructor ( interfaceInstance : InterfaceType , opts ?: AgentOpt ) {
154
205
this . interface = interfaceInstance ;
155
206
this . opts = Object . assign (
@@ -171,7 +222,6 @@ export class Agent<
171
222
? new ModelConfigManager ( opts . modelConfig )
172
223
: globalModelConfigManager ;
173
224
174
- this . screenshotScale = opts ?. screenshotScale ;
175
225
this . onTaskStartTip = this . opts . onTaskStartTip ;
176
226
177
227
this . insight = new Insight ( async ( action : InsightAction ) => {
@@ -226,33 +276,22 @@ export class Agent<
226
276
} ) ;
227
277
}
228
278
229
- // Unified screenshot scaling: prioritize screenshotScale, otherwise use DPR
230
- let targetWidth = context . size . width ;
231
- let targetHeight = context . size . height ;
232
- let needResize = false ;
233
-
234
- if ( this . screenshotScale && this . screenshotScale !== 1 ) {
235
- // User-specified scaling ratio
236
- debug ( `Applying user screenshot scale: ${ this . screenshotScale } ` ) ;
237
- targetWidth = Math . round ( context . size . width * this . screenshotScale ) ;
238
- targetHeight = Math . round ( context . size . height * this . screenshotScale ) ;
239
- needResize = true ;
240
- } else if ( context . size . dpr && context . size . dpr !== 1 ) {
241
- // No user-specified scaling, use DPR scaling to logical size
279
+ const computedScreenshotScale = await this . getScreenshotScale ( context ) ;
280
+
281
+ if ( computedScreenshotScale !== 1 ) {
282
+ const scaleForLog = Number . parseFloat ( computedScreenshotScale . toFixed ( 4 ) ) ;
242
283
debug (
243
- `Applying DPR scaling : ${ context . size . dpr } (resize to logical size)` ,
284
+ `Applying computed screenshot scale : ${ scaleForLog } (resize to logical size)` ,
244
285
) ;
245
- // Target is logical size, no need to change targetWidth/targetHeight
246
- needResize = true ;
247
- }
248
-
249
- // Execute scaling
250
- if ( needResize ) {
286
+ const targetWidth = Math . round ( context . size . width ) ;
287
+ const targetHeight = Math . round ( context . size . height ) ;
251
288
debug ( `Resizing screenshot to ${ targetWidth } x${ targetHeight } ` ) ;
252
289
context . screenshotBase64 = await resizeImgBase64 (
253
290
context . screenshotBase64 ,
254
291
{ width : targetWidth , height : targetHeight } ,
255
292
) ;
293
+ } else {
294
+ debug ( `screenshot scale=${ computedScreenshotScale } ` ) ;
256
295
}
257
296
258
297
return context ;
@@ -856,12 +895,18 @@ export class Agent<
856
895
857
896
const { element } = output ;
858
897
898
+ const dprValue = await ( this . interface . size ( ) as any ) . dpr ;
899
+ const dprEntry = dprValue
900
+ ? {
901
+ dpr : dprValue ,
902
+ }
903
+ : { } ;
859
904
return {
860
905
rect : element ?. rect ,
861
906
center : element ?. center ,
862
- dpr : ( await this . interface . size ( ) ) . dpr ,
907
+ ... dprEntry ,
863
908
} as Pick < LocateResultElement , 'rect' | 'center' > & {
864
- dpr : number ;
909
+ dpr ? : number ; // this field is deprecated
865
910
} ;
866
911
}
867
912
0 commit comments