1
1
import {
2
2
ConversationHistory ,
3
- elementByPositionWithElementInfo ,
4
3
findAllMidsceneLocatorField ,
5
4
uiTarsPlanning ,
6
5
} from '@/ai-model' ;
@@ -10,6 +9,7 @@ import {
10
9
type BaseElement ,
11
10
type DetailedLocateParam ,
12
11
type DumpSubscriber ,
12
+ type ElementCacheFeature ,
13
13
type ExecutionRecorderItem ,
14
14
type ExecutionTaskActionApply ,
15
15
type ExecutionTaskApply ,
@@ -40,7 +40,6 @@ import {
40
40
plan ,
41
41
} from '@/index' ;
42
42
import { sleep } from '@/utils' ;
43
- import { NodeType } from '@midscene/shared/constants' ;
44
43
import {
45
44
type IModelConfig ,
46
45
MIDSCENE_REPLANNING_CYCLE_LIMIT ,
@@ -123,68 +122,6 @@ export class TaskExecutor {
123
122
return item ;
124
123
}
125
124
126
- private async getElementXpath (
127
- uiContext : UIContext < BaseElement > ,
128
- element : LocateResultElement ,
129
- ) : Promise < string [ ] | undefined > {
130
- if ( ! ( this . interface as any ) . getXpathsByPoint ) {
131
- debug ( 'getXpathsByPoint is not supported for this interface' ) ;
132
- return undefined ;
133
- }
134
-
135
- let elementId = element ?. id ;
136
- if ( element ?. isOrderSensitive !== undefined ) {
137
- try {
138
- const xpaths = await ( this . interface as any ) . getXpathsByPoint (
139
- {
140
- left : element . center [ 0 ] ,
141
- top : element . center [ 1 ] ,
142
- } ,
143
- element ?. isOrderSensitive ,
144
- ) ;
145
-
146
- return xpaths ;
147
- } catch ( error ) {
148
- debug ( 'getXpathsByPoint failed: %s' , error ) ;
149
- return undefined ;
150
- }
151
- }
152
-
153
- // find the nearest xpath for the element
154
- if ( element ?. attributes ?. nodeType === NodeType . POSITION ) {
155
- await this . insight . contextRetrieverFn ( 'locate' ) ;
156
- const info = elementByPositionWithElementInfo (
157
- uiContext . tree ,
158
- {
159
- x : element . center [ 0 ] ,
160
- y : element . center [ 1 ] ,
161
- } ,
162
- {
163
- requireStrictDistance : false ,
164
- filterPositionElements : true ,
165
- } ,
166
- ) ;
167
- if ( info ?. id ) {
168
- elementId = info . id ;
169
- } else {
170
- debug (
171
- 'no element id found for position node, will not update cache' ,
172
- element ,
173
- ) ;
174
- }
175
- }
176
-
177
- if ( ! elementId ) {
178
- return undefined ;
179
- }
180
- try {
181
- const result = await ( this . interface as any ) . getXpathsById ( elementId ) ;
182
- return result ;
183
- } catch ( error ) {
184
- debug ( 'getXpathsById error: ' , error ) ;
185
- }
186
- }
187
-
188
125
private prependExecutorWithScreenshot (
189
126
taskApply : ExecutionTaskApply ,
190
127
appendAfterExecution = false ,
@@ -283,12 +220,12 @@ export class TaskExecutor {
283
220
const cachePrompt = param . prompt ;
284
221
const locateCacheRecord =
285
222
this . taskCache ?. matchLocateCache ( cachePrompt ) ;
286
- const xpaths = locateCacheRecord ?. cacheContent ?. xpaths ;
223
+ const cacheEntry = locateCacheRecord ?. cacheContent ?. cache ;
287
224
const elementFromCache = userExpectedPathHitFlag
288
225
? null
289
226
: await matchElementFromCache (
290
227
this ,
291
- xpaths ,
228
+ cacheEntry ,
292
229
cachePrompt ,
293
230
param . cacheable ,
294
231
) ;
@@ -324,38 +261,47 @@ export class TaskExecutor {
324
261
elementFromAiLocate ;
325
262
326
263
// update cache
327
- let currentXpaths : string [ ] | undefined ;
264
+ let currentCacheEntry : ElementCacheFeature | undefined ;
328
265
if (
329
266
element &&
330
267
this . taskCache &&
331
268
! cacheHitFlag &&
332
269
param ?. cacheable !== false
333
270
) {
334
- const elementXpaths = await this . getElementXpath (
335
- uiContext ,
336
- element ,
337
- ) ;
338
- if ( elementXpaths ?. length ) {
339
- debug (
340
- 'update cache, prompt: %s, xpaths: %s' ,
341
- cachePrompt ,
342
- elementXpaths ,
343
- ) ;
344
- currentXpaths = elementXpaths ;
345
- this . taskCache . updateOrAppendCacheRecord (
346
- {
347
- type : 'locate' ,
348
- prompt : cachePrompt ,
349
- xpaths : elementXpaths ,
350
- } ,
351
- locateCacheRecord ,
352
- ) ;
271
+ if ( this . interface . cacheFeatureForRect ) {
272
+ try {
273
+ const feature = await this . interface . cacheFeatureForRect (
274
+ element . rect ,
275
+ element . isOrderSensitive !== undefined
276
+ ? { _orderSensitive : element . isOrderSensitive }
277
+ : undefined ,
278
+ ) ;
279
+ if ( feature && Object . keys ( feature ) . length > 0 ) {
280
+ debug (
281
+ 'update cache, prompt: %s, cache: %o' ,
282
+ cachePrompt ,
283
+ feature ,
284
+ ) ;
285
+ currentCacheEntry = feature ;
286
+ this . taskCache . updateOrAppendCacheRecord (
287
+ {
288
+ type : 'locate' ,
289
+ prompt : cachePrompt ,
290
+ cache : feature ,
291
+ } ,
292
+ locateCacheRecord ,
293
+ ) ;
294
+ } else {
295
+ debug (
296
+ 'no cache data returned, skip cache update, prompt: %s' ,
297
+ cachePrompt ,
298
+ ) ;
299
+ }
300
+ } catch ( error ) {
301
+ debug ( 'cacheFeatureForRect failed: %s' , error ) ;
302
+ }
353
303
} else {
354
- debug (
355
- 'no xpaths found, will not update cache' ,
356
- cachePrompt ,
357
- elementXpaths ,
358
- ) ;
304
+ debug ( 'cacheFeatureForRect is not supported, skip cache update' ) ;
359
305
}
360
306
}
361
307
if ( ! element ) {
@@ -375,8 +321,8 @@ export class TaskExecutor {
375
321
hitBy = {
376
322
from : 'Cache' ,
377
323
context : {
378
- xpathsFromCache : xpaths ,
379
- xpathsToSave : currentXpaths ,
324
+ cacheEntry ,
325
+ cacheToSave : currentCacheEntry ,
380
326
} ,
381
327
} ;
382
328
} else if ( planHitFlag ) {
0 commit comments