Skip to content

Commit bd0d328

Browse files
committed
Add options and make parsing more lenient
1 parent 21695e6 commit bd0d328

File tree

2 files changed

+146
-23
lines changed

2 files changed

+146
-23
lines changed

src/schemas.ts

Lines changed: 65 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,23 @@ export type GetScriptingTipsInput = z.infer<typeof GetScriptingTipsInputSchema>;
6868
// AX Query Input Schema
6969
export const AXQueryInputSchema = z.object({
7070
command: z.enum(['query', 'perform']).describe('The operation to perform. (Formerly cmd)'),
71+
72+
// Fields for lenient parsing if locator is flattened
73+
app: z.string().optional().describe('Top-level app name (used if locator is a string and app is not specified within a locator object)'),
74+
role: z.string().optional().describe('Top-level role (used if locator is a string/flattened)'),
75+
match: z.record(z.string()).optional().describe('Top-level match (used if locator is a string/flattened)'),
76+
77+
locator: z.union([
78+
z.object({
79+
app: z.string().describe('Bundle ID or display name of the application to query'),
80+
role: z.string().describe('Accessibility role to match, e.g., "AXButton", "AXStaticText"'),
81+
match: z.record(z.string()).describe('Attributes to match for the element'),
82+
navigation_path_hint: z.array(z.string()).optional().describe('Optional path to navigate within the application hierarchy, e.g., ["window[1]", "toolbar[1]"]. (Formerly pathHint)'),
83+
}),
84+
z.string().describe('Bundle ID or display name of the application to query (used if role/match are provided at top level and this string serves as the app name)')
85+
]).describe('Specifications to find the target element(s). Can be a full locator object or just an app name string (if role/match are top-level).'),
86+
7187
return_all_matches: z.boolean().optional().describe('When true, returns all matching elements rather than just the first match. Default is false. (Formerly multi)'),
72-
locator: z.object({
73-
app: z.string().describe('Bundle ID or display name of the application to query'),
74-
role: z.string().describe('Accessibility role to match, e.g., "AXButton", "AXStaticText"'),
75-
match: z.record(z.string()).describe('Attributes to match for the element'),
76-
navigation_path_hint: z.array(z.string()).optional().describe('Optional path to navigate within the application hierarchy, e.g., ["window[1]", "toolbar[1]"]. (Formerly pathHint)'),
77-
}),
7888
attributes_to_query: z.array(z.string()).optional().describe('Attributes to query for matched elements. If not provided, common attributes will be included. (Formerly attributes)'),
7989
required_action_name: z.string().optional().describe('Filter elements to only those supporting this action, e.g., "AXPress". (Formerly requireAction)'),
8090
action_to_perform: z.string().optional().describe('Only used with command: "perform" - The action to perform on the matched element. (Formerly action)'),
@@ -83,6 +93,18 @@ export const AXQueryInputSchema = z.object({
8393
),
8494
limit: z.number().int().positive().optional().default(500).describe(
8595
'Maximum number of lines to return in the output. Defaults to 500. Output will be truncated if it exceeds this limit.'
96+
),
97+
max_elements: z.number().int().positive().optional().describe(
98+
'For return_all_matches: true queries, specifies the maximum number of UI elements to fully process and return. If omitted, a default (e.g., 200) is used internally by the ax binary. Helps control performance for very large result sets.'
99+
),
100+
debug_logging: z.boolean().optional().default(false).describe(
101+
'If true, enables detailed debug logging from the ax binary, which will be returned as part of the response. Defaults to false.'
102+
),
103+
output_format: z.enum(['smart', 'verbose', 'text_content']).optional().default('smart').describe(
104+
"Controls the format and verbosity of the attribute output. \n" +
105+
"'smart': (Default) Omits empty/placeholder values. Key-value pairs. \n" +
106+
"'verbose': Includes all attributes, even empty/placeholders. Key-value pairs. Useful for debugging. \n" +
107+
"'text_content': Returns only concatenated text values of common textual attributes (e.g., AXValue, AXTitle, AXDescription). No keys. Ideal for fast text extraction."
86108
)
87109
}).refine(
88110
(data) => {
@@ -93,7 +115,43 @@ export const AXQueryInputSchema = z.object({
93115
message: "When command is 'perform', an action_to_perform must be provided",
94116
path: ["action_to_perform"],
95117
}
96-
);
118+
).superRefine((data, ctx) => {
119+
if (typeof data.locator === 'string') { // Case 1: locator is a string (app name)
120+
if (data.role === undefined) {
121+
ctx.addIssue({
122+
code: z.ZodIssueCode.custom,
123+
message: "If 'locator' is a string (app name), top-level 'role' must be provided.",
124+
path: ['role'], // Path refers to the top-level role
125+
});
126+
}
127+
// data.match will default to {} if undefined later in the handler
128+
// data.app (top-level) is ignored if data.locator (string) is present, as the locator string *is* the app name.
129+
} else { // Case 2: locator is an object
130+
// Ensure top-level app, role, match are not present if locator is a full object, to avoid ambiguity.
131+
// This is a stricter interpretation. Alternatively, we could prioritize the locator object's fields.
132+
if (data.app !== undefined) {
133+
ctx.addIssue({
134+
code: z.ZodIssueCode.custom,
135+
message: "Top-level 'app' should not be provided if 'locator' is a detailed object. Define 'app' inside the 'locator' object.",
136+
path: ['app'],
137+
});
138+
}
139+
if (data.role !== undefined) {
140+
ctx.addIssue({
141+
code: z.ZodIssueCode.custom,
142+
message: "Top-level 'role' should not be provided if 'locator' is a detailed object. Define 'role' inside the 'locator' object.",
143+
path: ['role'],
144+
});
145+
}
146+
if (data.match !== undefined) {
147+
ctx.addIssue({
148+
code: z.ZodIssueCode.custom,
149+
message: "Top-level 'match' should not be provided if 'locator' is a detailed object. Define 'match' inside the 'locator' object.",
150+
path: ['match'],
151+
});
152+
}
153+
}
154+
});
97155

98156
export type AXQueryInput = z.infer<typeof AXQueryInputSchema>;
99157

src/server.ts

Lines changed: 81 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -75,18 +75,28 @@ const GetScriptingTipsInputShape = {
7575

7676
const AXQueryInputShape = {
7777
command: z.enum(['query', 'perform']),
78+
// Top-level fields for lenient parsing
79+
app: z.string().optional(),
80+
role: z.string().optional(),
81+
match: z.record(z.string()).optional(),
82+
83+
locator: z.union([
84+
z.object({
85+
app: z.string(),
86+
role: z.string(),
87+
match: z.record(z.string()),
88+
navigation_path_hint: z.array(z.string()).optional(),
89+
}),
90+
z.string()
91+
]),
7892
return_all_matches: z.boolean().optional(),
79-
locator: z.object({
80-
app: z.string(),
81-
role: z.string(),
82-
match: z.record(z.string()),
83-
navigation_path_hint: z.array(z.string()).optional(),
84-
}),
8593
attributes_to_query: z.array(z.string()).optional(),
8694
required_action_name: z.string().optional(),
8795
action_to_perform: z.string().optional(),
8896
report_execution_time: z.boolean().optional().default(false),
8997
limit: z.number().int().positive().optional().default(500),
98+
debug_logging: z.boolean().optional().default(false),
99+
output_format: z.enum(['smart', 'verbose', 'text_content']).optional().default('smart'),
90100
} as const;
91101

92102
async function main() {
@@ -445,6 +455,15 @@ This tool exposes the complete macOS accessibility API capabilities, allowing de
445455
446456
* \`limit\` (integer, optional): Maximum number of lines to return in the output. Defaults to 500. Output will be truncated if it exceeds this limit.
447457
458+
* \`max_elements\` (integer, optional): For \`return_all_matches: true\` queries, this specifies the maximum number of UI elements the \`ax\` binary will fully process and return attributes for. If omitted, an internal default (e.g., 200) is used. This helps manage performance when querying UIs with a very large number of matching elements (like numerous text fields on a complex web page). This is different from \`limit\`, which truncates the final text output based on lines.
459+
460+
* \`debug_logging\` (boolean, optional): If true, enables detailed debug logging from the underlying \`ax\` binary. This diagnostic information will be included in the response, which can be helpful for troubleshooting complex queries or unexpected behavior. Defaults to false.
461+
462+
* \`output_format\` (enum: 'smart' | 'verbose' | 'text_content', optional, default: 'smart'): Controls the format and verbosity of the attribute output from the \`ax\` binary.
463+
* \`'smart'\`: (Default) Optimized for readability. Omits attributes with empty or placeholder values. Returns key-value pairs.
464+
* \`'verbose'\`: Maximum detail. Includes all attributes, even empty/placeholders. Key-value pairs. Best for debugging element properties.
465+
* \`'text_content'\`: Highly compact for text extraction. Returns only concatenated text values of common textual attributes (e.g., AXValue, AXTitle). No keys are returned. Ideal for quickly getting all text from elements; the \`attributes_to_query\` parameter is ignored in this mode.
466+
448467
**Example Queries (Note: key names have changed to snake_case):**
449468
450469
1. **Find all text elements in the front Safari window:**
@@ -490,17 +509,48 @@ This tool exposes the complete macOS accessibility API capabilities, allowing de
490509
**Note:** Using this tool requires that the application running this server has the necessary Accessibility permissions in macOS System Settings > Privacy & Security > Accessibility.`,
491510
AXQueryInputShape,
492511
async (args: unknown) => {
493-
let input: AXQueryInput; // Declare input here to make it accessible in catch
512+
let inputFromZod: AXQueryInput;
494513
try {
495-
input = AXQueryInputSchema.parse(args);
496-
logger.info('accessibility_query called with input:', input);
514+
inputFromZod = AXQueryInputSchema.parse(args);
515+
logger.info('accessibility_query called with raw Zod-parsed input:', inputFromZod);
516+
517+
// Normalize the input to the canonical structure AXQueryExecutor expects
518+
let canonicalInput: AXQueryInput;
519+
520+
if (typeof inputFromZod.locator === 'string') {
521+
logger.debug('Normalizing malformed input (locator is string). Top-level data:', { appLocatorString: inputFromZod.locator, role: inputFromZod.role, match: inputFromZod.match });
522+
// Zod superRefine should have already ensured inputFromZod.role is defined.
523+
// The top-level inputFromZod.app is ignored here because inputFromZod.locator (the string) is the app.
524+
canonicalInput = {
525+
// Spread all other fields from inputFromZod first
526+
...inputFromZod,
527+
// Then explicitly define the locator object
528+
locator: {
529+
app: inputFromZod.locator, // The string locator is the app name
530+
role: inputFromZod.role!, // Role from top level (assert non-null due to Zod refine)
531+
match: inputFromZod.match || {}, // Match from top level, or default to empty
532+
navigation_path_hint: undefined // No path hint in this malformed case typically
533+
},
534+
// Nullify the top-level fields that are now part of the canonical locator
535+
// to avoid confusion if they were passed, though AXQueryExecutor won't use them.
536+
app: undefined,
537+
role: undefined,
538+
match: undefined
539+
};
540+
} else {
541+
// Well-formed case: locator is an object. Zod superRefine ensures top-level app/role/match are undefined.
542+
logger.debug('Input is well-formed (locator is object).');
543+
canonicalInput = inputFromZod;
544+
}
545+
546+
// logger.info('accessibility_query using canonical input for executor:', JSON.parse(JSON.stringify(canonicalInput))); // Commented out due to persistent linter issue
497547

498-
const result = await axQueryExecutor.execute(input);
548+
const result = await axQueryExecutor.execute(canonicalInput);
499549

500550
// For cleaner output, especially for multi-element queries, format the response
501551
let formattedOutput: string;
502552

503-
if (input.command === 'query' && input.return_all_matches === true) {
553+
if (inputFromZod.command === 'query' && inputFromZod.return_all_matches === true) {
504554
// For multi-element queries, format the results more readably
505555
if ('elements' in result) {
506556
formattedOutput = JSON.stringify(result, null, 2);
@@ -515,15 +565,22 @@ This tool exposes the complete macOS accessibility API capabilities, allowing de
515565
// Apply line limit
516566
let finalOutputText = formattedOutput;
517567
const lines = finalOutputText.split('\n');
518-
if (input.limit !== undefined && lines.length > input.limit) {
519-
finalOutputText = lines.slice(0, input.limit).join('\n');
520-
const truncationNotice = `\n\n--- Output truncated to ${input.limit} lines. Original length was ${lines.length} lines. ---`;
568+
if (inputFromZod.limit !== undefined && lines.length > inputFromZod.limit) {
569+
finalOutputText = lines.slice(0, inputFromZod.limit).join('\n');
570+
const truncationNotice = `\n\n--- Output truncated to ${inputFromZod.limit} lines. Original length was ${lines.length} lines. ---`;
521571
finalOutputText += truncationNotice;
522572
}
523573

524574
const responseContent: Array<{ type: 'text'; text: string }> = [{ type: 'text', text: finalOutputText }];
525575

526-
if (input.report_execution_time) {
576+
// Add debug logs if they exist in the result
577+
if (result.debug_logs && Array.isArray(result.debug_logs) && result.debug_logs.length > 0) {
578+
const debugHeader = "\n\n--- AX Binary Debug Logs ---";
579+
const logsString = result.debug_logs.join('\n');
580+
responseContent.push({ type: 'text', text: `${debugHeader}\n${logsString}` });
581+
}
582+
583+
if (inputFromZod.report_execution_time) {
527584
const ms = result.execution_time_seconds * 1000;
528585
let timeMessage = "Script executed in ";
529586
if (ms < 1) { // Less than 1 millisecond
@@ -545,7 +602,15 @@ This tool exposes the complete macOS accessibility API capabilities, allowing de
545602
} catch (error: unknown) {
546603
const err = error as Error;
547604
logger.error('Error in accessibility_query tool handler', { message: err.message });
548-
throw new sdkTypes.McpError(sdkTypes.ErrorCode.InternalError, `Failed to execute accessibility query: ${err.message}`);
605+
// If the error object from AXQueryExecutor contains debug_logs, include them
606+
let errorMessage = `Failed to execute accessibility query: ${err.message}`;
607+
const errorWithLogs = err as (Error & { debug_logs?: string[] }); // Cast here
608+
if (errorWithLogs.debug_logs && Array.isArray(errorWithLogs.debug_logs) && errorWithLogs.debug_logs.length > 0) {
609+
const debugHeader = "\n\n--- AX Binary Debug Logs (from error) ---";
610+
const logsString = errorWithLogs.debug_logs.join('\n');
611+
errorMessage += `\n${debugHeader}\n${logsString}`;
612+
}
613+
throw new sdkTypes.McpError(sdkTypes.ErrorCode.InternalError, errorMessage);
549614
}
550615
}
551616
);

0 commit comments

Comments
 (0)