Skip to content

Commit ca5cf7c

Browse files
authored
feat(recorder): add aiScroll function and enhance YAML task definitions (#991)
1 parent 826db3e commit ca5cf7c

File tree

1 file changed

+95
-0
lines changed

1 file changed

+95
-0
lines changed

packages/shared/src/constants/example-code.ts

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ IMPORTANT: Follow these exact type signatures for AI functions:
77
// Type signatures for AI functions:
88
aiInput(value: string, locator: string): Promise<void>
99
aiTap(locator: string): Promise<void>
10+
aiScroll(scrollParam: {
11+
direction: 'up' | 'down' | 'left' | 'right',
12+
scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft',
13+
distance: number - scroll distance, px is the unit
14+
}): Promise<void>
1015
aiAssert(assertion: string): Promise<void>
1116
aiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions
1217
@@ -97,4 +102,94 @@ tasks:
97102
- Use natural language descriptions
98103
- Add deepThink: true for complex interactions
99104
- Keep task names concise but descriptive
105+
106+
107+
108+
YAML type
109+
tasks:
110+
- name: <name>
111+
continueOnError: <boolean> # Optional, whether to continue to the next task on error, defaults to false.
112+
flow:
113+
# Auto Planning (.ai)
114+
# ----------------
115+
116+
# Perform an interaction. \`ai\` is a shorthand for \`aiAction\`.
117+
- ai: <prompt>
118+
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
119+
120+
# This usage is the same as \`ai\`.
121+
- aiAction: <prompt>
122+
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
123+
124+
# Instant Action (.aiTap, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)
125+
# ----------------
126+
127+
# Tap an element described by a prompt.
128+
- aiTap: <prompt>
129+
deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
130+
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
131+
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
132+
133+
# Hover over an element described by a prompt.
134+
- aiHover: <prompt>
135+
deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
136+
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
137+
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
138+
139+
# Input text into an element described by a prompt.
140+
- aiInput: <final text content of the input>
141+
locate: <prompt>
142+
deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
143+
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
144+
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
145+
146+
# Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.
147+
- aiKeyboardPress: <key>
148+
locate: <prompt>
149+
deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
150+
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
151+
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
152+
153+
# Scroll globally or on an element described by a prompt.
154+
- aiScroll:
155+
direction: 'up' # or 'down' | 'left' | 'right'
156+
scrollType: 'once' # or 'untilTop' | 'untilBottom' | 'untilLeft' | 'untilRight'
157+
distance: <number> # Optional, the scroll distance in pixels.
158+
locate: <prompt> # Optional, the element to scroll on.
159+
deepThink: <boolean> # Optional, whether to use deepThink to precisely locate the element. Defaults to False.
160+
xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.
161+
cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.
162+
163+
# Log the current screenshot with a description in the report file.
164+
- logScreenshot: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.
165+
content: <content> # Optional, the description of the screenshot.
166+
167+
# Data Extraction
168+
# ----------------
169+
170+
# Perform a query that returns a JSON object.
171+
- aiQuery: <prompt> # Remember to describe the format of the result in the prompt.
172+
name: <name> # The key for the query result in the JSON output.
173+
174+
# More APIs
175+
# ----------------
176+
177+
# Wait for a condition to be met, with a timeout (in ms, optional, defaults to 30000).
178+
- aiWaitFor: <prompt>
179+
timeout: <ms>
180+
181+
# Perform an assertion.
182+
- aiAssert: <prompt>
183+
errorMessage: <error-message> # Optional, the error message to print if the assertion fails.
184+
185+
# Wait for a specified amount of time.
186+
- sleep: <ms>
187+
188+
# Execute a piece of JavaScript code in the web page context.
189+
- javascript: <javascript>
190+
name: <name> # Optional, assign a name to the return value, which will be used as a key in the JSON output.
191+
192+
- name: <name>
193+
flow:
194+
# ...
100195
`;

0 commit comments

Comments
 (0)