You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: src/schemas.ts
+65-7Lines changed: 65 additions & 7 deletions
Original file line number
Diff line number
Diff line change
@@ -68,13 +68,23 @@ export type GetScriptingTipsInput = z.infer<typeof GetScriptingTipsInputSchema>;
68
68
// AX Query Input Schema
69
69
exportconstAXQueryInputSchema=z.object({
70
70
command: z.enum(['query','perform']).describe('The operation to perform. (Formerly cmd)'),
71
+
72
+
// Fields for lenient parsing if locator is flattened
73
+
app: z.string().optional().describe('Top-level app name (used if locator is a string and app is not specified within a locator object)'),
74
+
role: z.string().optional().describe('Top-level role (used if locator is a string/flattened)'),
75
+
match: z.record(z.string()).optional().describe('Top-level match (used if locator is a string/flattened)'),
76
+
77
+
locator: z.union([
78
+
z.object({
79
+
app: z.string().describe('Bundle ID or display name of the application to query'),
80
+
role: z.string().describe('Accessibility role to match, e.g., "AXButton", "AXStaticText"'),
81
+
match: z.record(z.string()).describe('Attributes to match for the element'),
82
+
navigation_path_hint: z.array(z.string()).optional().describe('Optional path to navigate within the application hierarchy, e.g., ["window[1]", "toolbar[1]"]. (Formerly pathHint)'),
83
+
}),
84
+
z.string().describe('Bundle ID or display name of the application to query (used if role/match are provided at top level and this string serves as the app name)')
85
+
]).describe('Specifications to find the target element(s). Can be a full locator object or just an app name string (if role/match are top-level).'),
86
+
71
87
return_all_matches: z.boolean().optional().describe('When true, returns all matching elements rather than just the first match. Default is false. (Formerly multi)'),
72
-
locator: z.object({
73
-
app: z.string().describe('Bundle ID or display name of the application to query'),
74
-
role: z.string().describe('Accessibility role to match, e.g., "AXButton", "AXStaticText"'),
75
-
match: z.record(z.string()).describe('Attributes to match for the element'),
76
-
navigation_path_hint: z.array(z.string()).optional().describe('Optional path to navigate within the application hierarchy, e.g., ["window[1]", "toolbar[1]"]. (Formerly pathHint)'),
77
-
}),
78
88
attributes_to_query: z.array(z.string()).optional().describe('Attributes to query for matched elements. If not provided, common attributes will be included. (Formerly attributes)'),
79
89
required_action_name: z.string().optional().describe('Filter elements to only those supporting this action, e.g., "AXPress". (Formerly requireAction)'),
80
90
action_to_perform: z.string().optional().describe('Only used with command: "perform" - The action to perform on the matched element. (Formerly action)'),
'For return_all_matches: true queries, specifies the maximum number of UI elements to fully process and return. If omitted, a default (e.g., 200) is used internally by the ax binary. Helps control performance for very large result sets.'
"'verbose': Includes all attributes, even empty/placeholders. Key-value pairs. Useful for debugging. \n"+
107
+
"'text_content': Returns only concatenated text values of common textual attributes (e.g., AXValue, AXTitle, AXDescription). No keys. Ideal for fast text extraction."
@@ -445,6 +455,15 @@ This tool exposes the complete macOS accessibility API capabilities, allowing de
445
455
446
456
* \`limit\` (integer, optional): Maximum number of lines to return in the output. Defaults to 500. Output will be truncated if it exceeds this limit.
447
457
458
+
* \`max_elements\` (integer, optional): For \`return_all_matches: true\` queries, this specifies the maximum number of UI elements the \`ax\` binary will fully process and return attributes for. If omitted, an internal default (e.g., 200) is used. This helps manage performance when querying UIs with a very large number of matching elements (like numerous text fields on a complex web page). This is different from \`limit\`, which truncates the final text output based on lines.
459
+
460
+
* \`debug_logging\` (boolean, optional): If true, enables detailed debug logging from the underlying \`ax\` binary. This diagnostic information will be included in the response, which can be helpful for troubleshooting complex queries or unexpected behavior. Defaults to false.
461
+
462
+
* \`output_format\` (enum: 'smart' | 'verbose' | 'text_content', optional, default: 'smart'): Controls the format and verbosity of the attribute output from the \`ax\` binary.
463
+
* \`'smart'\`: (Default) Optimized for readability. Omits attributes with empty or placeholder values. Returns key-value pairs.
464
+
* \`'verbose'\`: Maximum detail. Includes all attributes, even empty/placeholders. Key-value pairs. Best for debugging element properties.
465
+
* \`'text_content'\`: Highly compact for text extraction. Returns only concatenated text values of common textual attributes (e.g., AXValue, AXTitle). No keys are returned. Ideal for quickly getting all text from elements; the \`attributes_to_query\` parameter is ignored in this mode.
466
+
448
467
**Example Queries (Note: key names have changed to snake_case):**
449
468
450
469
1. **Find all text elements in the front Safari window:**
@@ -490,17 +509,48 @@ This tool exposes the complete macOS accessibility API capabilities, allowing de
490
509
**Note:** Using this tool requires that the application running this server has the necessary Accessibility permissions in macOS System Settings > Privacy & Security > Accessibility.`,
491
510
AXQueryInputShape,
492
511
async(args: unknown)=>{
493
-
let input: AXQueryInput;// Declare input here to make it accessible in catch
512
+
let inputFromZod: AXQueryInput;
494
513
try{
495
-
input=AXQueryInputSchema.parse(args);
496
-
logger.info('accessibility_query called with input:',input);
514
+
inputFromZod=AXQueryInputSchema.parse(args);
515
+
logger.info('accessibility_query called with raw Zod-parsed input:',inputFromZod);
516
+
517
+
// Normalize the input to the canonical structure AXQueryExecutor expects
// Zod superRefine should have already ensured inputFromZod.role is defined.
523
+
// The top-level inputFromZod.app is ignored here because inputFromZod.locator (the string) is the app.
524
+
canonicalInput={
525
+
// Spread all other fields from inputFromZod first
526
+
...inputFromZod,
527
+
// Then explicitly define the locator object
528
+
locator: {
529
+
app: inputFromZod.locator,// The string locator is the app name
530
+
role: inputFromZod.role!,// Role from top level (assert non-null due to Zod refine)
531
+
match: inputFromZod.match||{},// Match from top level, or default to empty
532
+
navigation_path_hint: undefined// No path hint in this malformed case typically
533
+
},
534
+
// Nullify the top-level fields that are now part of the canonical locator
535
+
// to avoid confusion if they were passed, though AXQueryExecutor won't use them.
536
+
app: undefined,
537
+
role: undefined,
538
+
match: undefined
539
+
};
540
+
}else{
541
+
// Well-formed case: locator is an object. Zod superRefine ensures top-level app/role/match are undefined.
542
+
logger.debug('Input is well-formed (locator is object).');
543
+
canonicalInput=inputFromZod;
544
+
}
545
+
546
+
// logger.info('accessibility_query using canonical input for executor:', JSON.parse(JSON.stringify(canonicalInput))); // Commented out due to persistent linter issue
0 commit comments