web-infra-dev · EAGzzyCSL · Aug 6, 2025 · Aug 7, 2025 · Aug 12, 2025
diff --git a/packages/core/src/ai-model/common.ts b/packages/core/src/ai-model/common.ts
@@ -24,7 +24,7 @@ import {
 
 import type { PlanningLocateParam } from '@/types';
 import { NodeType } from '@midscene/shared/constants';
-import { vlLocateMode } from '@midscene/shared/env';
+import { type IModelPreferences, vlLocateMode } from '@midscene/shared/env';
 import { treeToList } from '@midscene/shared/extractor';
 import { compositeElementInfoImg } from '@midscene/shared/img';
 import { getDebug } from '@midscene/shared/logger';
@@ -45,8 +45,13 @@ export enum AIActionType {
 export async function callAiFn<T>(
   msgs: AIArgs,
   AIActionTypeValue: AIActionType,
+  modelPreferences?: IModelPreferences,
 ): Promise<{ content: T; usage?: AIUsageInfo }> {
-  const jsonObject = await callToGetJSONObject<T>(msgs, AIActionTypeValue);
+  const jsonObject = await callToGetJSONObject<T>(
+    msgs,
+    AIActionTypeValue,
+    modelPreferences,
+  );
 
   return {
     content: jsonObject.content,

diff --git a/packages/core/src/ai-model/inspect.ts b/packages/core/src/ai-model/inspect.ts
@@ -15,11 +15,12 @@ import type {
   UIContext,
 } from '@/types';
 import {
-  MIDSCENE_USE_QWEN_VL,
-  MIDSCENE_USE_VLM_UI_TARS,
-  getAIConfigInBoolean,
+  type IModelPreferences,
+  getIsUseQwenVl,
+  getIsUseVlmUiTars,
   vlLocateMode,
 } from '@midscene/shared/env';
+
 import {
   cropByRect,
   paddingToMatchBlockByBase64,
@@ -364,7 +365,7 @@ export async function AiLocateSection(options: {
     imageBase64 = await cropByRect(
       screenshotBase64,
       sectionRect,
-      getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),
+      getIsUseQwenVl(),
     );
   }
 
@@ -385,8 +386,15 @@ export async function AiExtractElementInfo<
   multimodalPrompt?: TMultimodalPrompt;
   context: UIContext<ElementType>;
   extractOption?: InsightExtractOption;
+  modelPreferences?: IModelPreferences;
 }) {
-  const { dataQuery, context, extractOption, multimodalPrompt } = options;
+  const {
+    dataQuery,
+    context,
+    extractOption,
+    multimodalPrompt,
+    modelPreferences,
+  } = options;
   const systemPrompt = systemPromptToExtract();
 
   const { screenshotBase64 } = context;
@@ -445,6 +453,7 @@ export async function AiExtractElementInfo<
   const result = await callAiFn<AIDataExtractionResponse<T>>(
     msgs,
     AIActionType.EXTRACT_DATA,
+    modelPreferences,
   );
   return {
     parseResult: result.content,
@@ -463,7 +472,7 @@ export async function AiAssert<
   const { screenshotBase64 } = context;
 
   const systemPrompt = systemPromptToAssert({
-    isUITars: getAIConfigInBoolean(MIDSCENE_USE_VLM_UI_TARS),
+    isUITars: getIsUseVlmUiTars(),
   });
 
   const assertionText = extraTextFromUserPrompt(assertion);

diff --git a/packages/core/src/ai-model/prompt/playwright-generator.ts b/packages/core/src/ai-model/prompt/playwright-generator.ts
@@ -206,7 +206,7 @@ ${PLAYWRIGHT_EXAMPLE_CODE}`;
 
   if (options.stream && options.onChunk) {
     // Use streaming
-    return await callAi(prompt, AIActionType.EXTRACT_DATA, undefined, {
+    return await callAi(prompt, AIActionType.EXTRACT_DATA, {
       stream: true,
       onChunk: options.onChunk,
     });

diff --git a/packages/core/src/ai-model/prompt/yaml-generator.ts b/packages/core/src/ai-model/prompt/yaml-generator.ts
@@ -425,7 +425,7 @@ Respond with YAML only, no explanations.`,
 
     if (options.stream && options.onChunk) {
       // Use streaming
-      return await callAi(prompt, AIActionType.EXTRACT_DATA, undefined, {
+      return await callAi(prompt, AIActionType.EXTRACT_DATA, {
         stream: true,
         onChunk: options.onChunk,
       });