golemcloud
diff --git a/‎golem-skills/tests/harness/src/executor.ts‎
Lines changed: 147 additions & 4 deletions b/‎golem-skills/tests/harness/src/executor.ts‎
Lines changed: 147 additions & 4 deletions
diff --git a/‎golem-skills/tests/harness/src/run.ts‎
Lines changed: 108 additions & 1 deletion b/‎golem-skills/tests/harness/src/run.ts‎
Lines changed: 108 additions & 1 deletion
@@ -8,7 +8,6 @@ import { SkillWatcher } from "./watcher.js";
 import { evaluate, ExpectSchema, type AssertionContext } from "./assertions.js";
 
 export const DEFAULT_STEP_TIMEOUT_SECONDS = 300;
-
 // --- Schemas ---
 
 const InvokeSchema = z.object({
@@ -39,6 +38,12 @@ const DeleteAgentSchema = z.object({
   name: z.string(),
 });
 
+const StepConditionSchema = z.object({
+  agent: z.string().optional(),
+  language: z.string().optional(),
+  os: z.string().optional(),
+});
+
 const ACTION_FIELDS = [
   "prompt",
   "invoke",
@@ -71,6 +76,8 @@ const StepSpecSchema = z
     trigger: TriggerSchema.optional(),
     create_agent: CreateAgentSchema.optional(),
     delete_agent: DeleteAgentSchema.optional(),
+    only_if: StepConditionSchema.optional(),
+    skip_if: StepConditionSchema.optional(),
   })
   .refine(
     (step) => {
@@ -94,7 +101,6 @@ const SettingsSchema = z
     cleanup: z.boolean().optional(),
   })
   .optional();
-
 const PrerequisitesSchema = z
   .object({
     env: z.record(z.string()).optional(),
@@ -120,6 +126,8 @@ interface StepCommon {
     deploy?: boolean;
   };
   expect?: z.infer<typeof ExpectSchema>;
+  only_if?: StepCondition;
+  skip_if?: StepCondition;
 }
 
 type InvokeSpec = { agent: string; function: string; args?: string };
@@ -248,6 +256,57 @@ export interface ScenarioRunResult {
 
 export interface ScenarioExecutorOptions {
   globalTimeoutSeconds?: number;
+  agent?: string;
+  language?: string;
+  abortSignal?: AbortSignal;
+}
+
+// --- Template variable substitution ---
+
+export function substituteVariables(
+  text: string,
+  variables: Record<string, string>,
+): string {
+  return text.replace(/\{\{(\w+)\}\}/g, (match, name: string) => {
+    return variables[name] ?? match;
+  });
+}
+
+// --- Conditional step execution ---
+
+export interface StepCondition {
+  agent?: string;
+  language?: string;
+  os?: string;
+}
+
+function normalizePlatform(platform: string): string {
+  if (platform === "darwin") return "macos";
+  if (platform === "win32") return "windows";
+  return platform;
+}
+
+export function shouldRunStep(
+  step: StepSpec,
+  context: { agent?: string; language?: string; os: string },
+): boolean {
+  const normalizedOs = normalizePlatform(context.os);
+
+  if (step.only_if) {
+    const cond = step.only_if;
+    if (cond.agent && cond.agent !== context.agent) return false;
+    if (cond.language && cond.language !== context.language) return false;
+    if (cond.os && cond.os !== normalizedOs) return false;
+  }
+
+  if (step.skip_if) {
+    const cond = step.skip_if;
+    if (cond.agent && cond.agent === context.agent) return false;
+    if (cond.language && cond.language === context.language) return false;
+    if (cond.os && cond.os === normalizedOs) return false;
+  }
+
+  return true;
 }
 
 export class ScenarioExecutor {
@@ -271,6 +330,64 @@ export class ScenarioExecutor {
     this.options = options ?? {};
   }
 
+  private buildVariables(scenarioName: string): Record<string, string> {
+    const vars: Record<string, string> = {
+      workspace: this.workspace,
+      scenario: scenarioName,
+    };
+    if (this.options.agent) vars["agent"] = this.options.agent;
+    if (this.options.language) vars["language"] = this.options.language;
+    return vars;
+  }
+
+  private substituteStepVariables(
+    step: StepSpec,
+    variables: Record<string, string>,
+  ): StepSpec {
+    const sub = (s: string | undefined) =>
+      s ? substituteVariables(s, variables) : s;
+    const subArr = (arr: string[] | undefined) =>
+      arr?.map((s) => substituteVariables(s, variables));
+
+    return {
+      ...step,
+      prompt: sub(step.prompt),
+      shell: step.shell
+        ? {
+            command: substituteVariables(step.shell.command, variables),
+            args: subArr(step.shell.args),
+            cwd: sub(step.shell.cwd),
+          }
+        : step.shell,
+      invoke: step.invoke
+        ? {
+            agent: substituteVariables(step.invoke.agent, variables),
+            function: substituteVariables(step.invoke.function, variables),
+            args: sub(step.invoke.args),
+          }
+        : step.invoke,
+      trigger: step.trigger
+        ? {
+            agent: substituteVariables(step.trigger.agent, variables),
+            function: substituteVariables(step.trigger.function, variables),
+            args: sub(step.trigger.args),
+          }
+        : step.trigger,
+      create_agent: step.create_agent
+        ? {
+            ...step.create_agent,
+            name: substituteVariables(step.create_agent.name, variables),
+          }
+        : step.create_agent,
+      delete_agent: step.delete_agent
+        ? {
+            ...step.delete_agent,
+            name: substituteVariables(step.delete_agent.name, variables),
+          }
+        : step.delete_agent,
+    } as StepSpec;
+  }
+
   async execute(spec: ScenarioSpec): Promise<ScenarioRunResult> {
     const results: StepResult[] = [];
     const savedEnv: Record<string, string | undefined> = {};
@@ -304,11 +421,37 @@ export class ScenarioExecutor {
 
     // Build extra env for commands from settings
     const commandEnv = this.buildCommandEnv(spec);
+    const variables = this.buildVariables(spec.name);
+    const conditionContext = {
+      agent: this.options.agent,
+      language: this.options.language,
+      os: process.platform,
+    };
 
     const startTime = Date.now();
     let isFirstPrompt = true;
     try {
-      for (const step of spec.steps) {
+      for (const originalStep of spec.steps) {
+        // Check abort signal
+        if (this.options.abortSignal?.aborted) break;
+
+        // Substitute template variables
+        const step = this.substituteStepVariables(originalStep, variables);
+
+        // Conditional execution
+        if (!shouldRunStep(step, conditionContext)) {
+          console.log(
+            `Step ${step.id ?? "(unnamed)"}: skipped (condition not met)`,
+          );
+          results.push({
+            step: originalStep,
+            success: true,
+            durationSeconds: 0,
+            expectedSkills: step.expectedSkills ?? [],
+            activatedSkills: [],
+          });
+          continue;
+        }
         const stepStartTime = Date.now();
         let stepSuccess = true;
         const stepErrors: string[] = [];
@@ -601,7 +744,7 @@ export class ScenarioExecutor {
         }
 
         results.push({
-          step,
+          step: originalStep,
           success: stepSuccess,
           durationSeconds: (Date.now() - stepStartTime) / 1000,
           expectedSkills: step.expectedSkills ?? [],
 
@@ -60,6 +60,7 @@ async function main() {
       timeout: { type: "string" },
       skills: { type: "string", default: "../../skills" },
       help: { type: "boolean", short: "h", default: false },
+      "dry-run": { type: "boolean", default: false },
     },
   });
 
@@ -70,6 +71,7 @@ async function main() {
     timeout,
     skills: skillsDirRel,
     help,
+    "dry-run": dryRun,
   } = values;
   const agentArg = values.agent ?? "all";
   const languageArg = values.language ?? "all";
@@ -89,6 +91,7 @@ Options:
   --output <dir>        Results output directory (default: ./results)
   --timeout <seconds>   Global timeout per scenario step in seconds (default: ${DEFAULT_STEP_TIMEOUT_SECONDS})
   --skills <dir>        Path to skills directory (default: ../../skills)
+  --dry-run             Validate scenarios and print step summaries without executing
   -h, --help            Show this help message
 `.trim();
 
@@ -145,6 +148,64 @@ Options:
     (f) => f.endsWith(".yaml") || f.endsWith(".yml"),
   );
 
+  // Dry-run mode: validate and print step summaries, then exit
+  if (dryRun) {
+    console.log(chalk.bold("=== Dry Run ==="));
+    for (const file of scenarioFiles) {
+      const spec = await ScenarioLoader.load(path.join(scenariosDir, file));
+      if (scenarioFilter && spec.name !== scenarioFilter) continue;
+
+      console.log(chalk.blue(`\nScenario: ${spec.name}`));
+      console.log(`  Steps: ${spec.steps.length}`);
+      for (let i = 0; i < spec.steps.length; i++) {
+        const step = spec.steps[i];
+        const label = step.id ?? `step-${i + 1}`;
+        const promptPreview = step.prompt
+          ? step.prompt.length > 60
+            ? step.prompt.slice(0, 57) + "..."
+            : step.prompt
+          : "(no prompt)";
+        const skills = step.expectedSkills?.join(", ") || "(none)";
+        const timeoutVal =
+          step.timeout ?? spec.settings?.timeout_per_subprompt ?? "default";
+        const conditions: string[] = [];
+        if (step.only_if) {
+          conditions.push(`only_if: ${JSON.stringify(step.only_if)}`);
+        }
+        if (step.skip_if) {
+          conditions.push(`skip_if: ${JSON.stringify(step.skip_if)}`);
+        }
+        console.log(`  [${label}] ${promptPreview}`);
+        console.log(
+          `    skills: ${skills} | timeout: ${typeof timeoutVal === "number" ? `${timeoutVal}s` : timeoutVal}`,
+        );
+        if (conditions.length > 0) {
+          console.log(`    conditions: ${conditions.join(", ")}`);
+        }
+      }
+    }
+    console.log(chalk.green("\nAll scenarios validated successfully."));
+    return;
+  }
+
+  // Set up graceful Ctrl+C handling
+  const abortController = new AbortController();
+  let interrupted = false;
+
+  process.on("SIGINT", () => {
+    if (interrupted) {
+      console.log(chalk.red("\nForce exit."));
+      process.exit(130);
+    }
+    interrupted = true;
+    console.log(
+      chalk.yellow(
+        "\nInterrupted. Finishing current step and writing partial results... (press Ctrl+C again to force exit)",
+      ),
+    );
+    abortController.abort();
+  });
+
   const scenarioReports: ScenarioReport[] = [];
   let hasFailures = false;
 
@@ -159,6 +220,14 @@ Options:
       );
 
       for (const file of scenarioFiles) {
+        // Check if interrupted before starting next scenario
+        if (interrupted) {
+          console.log(
+            chalk.yellow(`Skipping remaining scenarios due to interruption.`),
+          );
+          break;
+        }
+
         const spec = await ScenarioLoader.load(path.join(scenariosDir, file));
 
         if (scenarioFilter && spec.name !== scenarioFilter) continue;
@@ -178,7 +247,12 @@ Options:
           watcher,
           workspace,
           skillsDir,
-          { globalTimeoutSeconds },
+          {
+            globalTimeoutSeconds,
+            agent: currentAgent,
+            language: currentLanguage,
+            abortSignal: abortController.signal,
+          },
         );
 
         const scenarioResult = await executor.execute(spec);
@@ -265,6 +339,39 @@ Options:
     const summaryPath = path.join(resultsDir, "summary.json");
     await fs.writeFile(summaryPath, JSON.stringify(summary, null, 2));
 
+    // GitHub Actions job summary
+    const ghSummaryPath = process.env["GITHUB_STEP_SUMMARY"];
+    if (ghSummaryPath) {
+      const lines: string[] = [];
+      lines.push("## Skill Test Results");
+      lines.push("");
+      lines.push("| Scenario | Agent | Language | Status | Duration |");
+      lines.push("|----------|-------|----------|--------|----------|");
+      for (const r of scenarioReports) {
+        const icon = r.status === "pass" ? "\u2705" : "\u274c";
+        lines.push(
+          `| ${r.scenario} | ${r.matrix.agent} | ${r.matrix.language} | ${icon} ${r.status} | ${r.durationSeconds.toFixed(1)}s |`,
+        );
+      }
+      lines.push("");
+      lines.push(
+        `**Total:** ${totalScenarios} | **Passed:** ${passed} | **Failed:** ${failed} | **Duration:** ${totalDuration.toFixed(1)}s`,
+      );
+
+      if (worstFailures.length > 0) {
+        lines.push("");
+        lines.push("### Failures");
+        for (const f of worstFailures) {
+          const truncatedError =
+            f.error.length > 200 ? f.error.slice(0, 197) + "..." : f.error;
+          lines.push(`- **${f.scenario}**: ${truncatedError}`);
+        }
+      }
+      lines.push("");
+
+      await fs.appendFile(ghSummaryPath, lines.join("\n"));
+    }
+
     // Print summary
     console.log("");
     console.log(chalk.bold("=== Test Summary ==="));