feat: load review methodology from factory-mono builtin skill

factory-nizar · factory-droid[bot] · factory-nizar · commit bcc5b7a4e5b9 · 2026-03-27T14:46:10.000-07:00
Load the review skill's shared methodology from factory-mono's builtin-skills/review/SKILL.md instead of keeping it inline in the CI prompt templates. The skill is loaded at runtime via local plugin cache or GitHub fallback. The shared methodology (bug patterns, reporting gate, confidence calibration, deduplication, analysis discipline) is extracted via BEGIN_SHARED_METHODOLOGY / END_SHARED_METHODOLOGY markers and injected into both candidate and validator prompts. Suggestion block rules remain controlled by the include_suggestions toggle at the CI template level. Depends on: Factory-AI/factory-mono#11498 Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com>
diff --git a/src/create-prompt/index.ts b/src/create-prompt/index.ts
@@ -305,6 +305,7 @@ export type PromptCreationOptions = {
   reviewArtifacts?: ReviewArtifacts;
   outputFilePath?: string;
   includeSuggestions?: boolean;
+  reviewSkillContent?: string;
 };
 
 export async function createPrompt({
@@ -320,6 +321,7 @@ export async function createPrompt({
   reviewArtifacts,
   outputFilePath,
   includeSuggestions,
+  reviewSkillContent,
 }: PromptCreationOptions) {
   try {
     const droidCommentId = commentId?.toString();
@@ -340,6 +342,10 @@ export async function createPrompt({
       preparedContext.includeSuggestions = includeSuggestions;
     }
 
+    if (reviewSkillContent) {
+      preparedContext.reviewSkillContent = reviewSkillContent;
+    }
+
     await mkdir(`${process.env.RUNNER_TEMP || "/tmp"}/droid-prompts`, {
       recursive: true,
     });
diff --git a/src/create-prompt/templates/review-candidates-prompt.ts b/src/create-prompt/templates/review-candidates-prompt.ts
@@ -1,3 +1,4 @@
+import { formatSkillSection } from "../../utils/load-skill";
 import type { PreparedContext } from "../types";
 
 export function generateReviewCandidatesPrompt(
@@ -52,7 +53,7 @@ export function generateReviewCandidatesPrompt(
   return `You are a senior staff software engineer and expert code reviewer.
 
 Your task: Review PR #${prNumber} in ${repoFullName} and generate a JSON file with **high-confidence, actionable** review comments that pinpoint genuine issues.
-
+${formatSkillSection(context.reviewSkillContent)}
 <context>
 Repo: ${repoFullName}
 PR Number: ${prNumber}
@@ -76,19 +77,7 @@ Precomputed data files:
 <review_guidelines>
 - You are currently checked out to the PR branch.
 - Review ALL modified files in the PR branch.
-- Focus on: functional correctness, syntax errors, logic bugs, broken dependencies/contracts/tests, security issues, and performance problems.
-- High-signal bug patterns to actively check for (only comment when evidenced in the diff):
-  - Null/undefined/Optional dereferences; missing-key errors on untrusted/external dict/JSON payloads
-  - Resource leaks (unclosed files/streams/connections; missing cleanup on error paths)
-  - Injection vulnerabilities (SQL injection, XSS, command/template injection) and auth/security invariant violations
-  - OAuth/CSRF invariants: state must be per-flow unpredictable and validated; avoid deterministic/predictable state or missing state checks
-  - Concurrency/race/atomicity hazards (TOCTOU, lost updates, unsafe shared state, process/thread lifecycle bugs)
-  - Missing error handling for critical operations (network, persistence, auth, migrations, external APIs)
-  - Wrong-variable/shadowing mistakes; contract mismatches (serializer/validated_data, interfaces/abstract methods)
-  - Type-assumption bugs (e.g., numeric ops on datetime/strings, ordering key type mismatches)
-  - Offset/cursor/pagination semantic mismatches (off-by-one, prev/next behavior, commit semantics)
 - Do NOT duplicate comments already in \`${commentsPath}\`.
-- Only flag issues you are confident about—avoid speculative or stylistic nitpicks.
 </review_guidelines>
 
 <triage_phase>
diff --git a/src/create-prompt/templates/review-validator-prompt.ts b/src/create-prompt/templates/review-validator-prompt.ts
@@ -1,3 +1,4 @@
+import { formatSkillSection } from "../../utils/load-skill";
 import type { PreparedContext } from "../types";
 
 export function generateReviewValidatorPrompt(
@@ -43,7 +44,7 @@ export function generateReviewValidatorPrompt(
   return `You are validating candidate review comments for PR #${prNumber} in ${repoFullName}.
 
 IMPORTANT: This is Phase 2 (validator) of a two-pass review pipeline.
-
+${formatSkillSection(context.reviewSkillContent)}
 ### Context
 
 * Repo: ${repoFullName}
@@ -101,20 +102,11 @@ Read:
 
 ## Phase 2: Validate candidates
 
-Apply the same Reporting Gate as review:
-
-### Approve ONLY if at least one is true
-* Definite runtime failure
-* Incorrect logic with a concrete trigger path and wrong outcome
-* Security vulnerability with realistic exploit
-* Data corruption/loss
-* Breaking contract change (discoverable in code/tests)
+Apply the Reporting Gate, confidence calibration, and deduplication rules from the review methodology above.
 
-Reject if:
-* It's speculative / "might" without a concrete trigger
-* It's stylistic / naming / formatting
+Additionally reject if:
 * It's not anchored to a valid changed line
-* It's already reported (dedupe against existing comments)
+* It's already reported (dedupe against existing comments in \`${commentsPath}\`)
 
 ### Deduplication (STRICT)
 
diff --git a/src/create-prompt/types.ts b/src/create-prompt/types.ts
@@ -119,4 +119,5 @@ export type PreparedContext = CommonFields & {
   reviewArtifacts?: ReviewArtifacts;
   outputFilePath?: string;
   includeSuggestions?: boolean;
+  reviewSkillContent?: string;
 };
diff --git a/src/entrypoints/generate-review-prompt.ts b/src/entrypoints/generate-review-prompt.ts
@@ -15,6 +15,7 @@ import { prepareMcpTools } from "../mcp/install-mcp-server";
 import { generateReviewCandidatesPrompt } from "../create-prompt/templates/review-candidates-prompt";
 import { generateSecurityReviewPrompt } from "../create-prompt/templates/security-review-prompt";
 import { normalizeDroidArgs, parseAllowedTools } from "../utils/parse-tools";
+import { loadSkill } from "../utils/load-skill";
 
 async function run() {
   try {
@@ -98,6 +99,9 @@ async function run() {
     const outputFilePath = process.env.DROID_OUTPUT_FILE || undefined;
     const includeSuggestions = process.env.INCLUDE_SUGGESTIONS !== "false";
 
+    const reviewSkillContent =
+      reviewType === "code" ? await loadSkill("review") : undefined;
+
     await createPrompt({
       githubContext: context,
       commentId,
@@ -110,6 +114,7 @@ async function run() {
       reviewArtifacts,
       outputFilePath,
       includeSuggestions,
+      reviewSkillContent,
     });
 
     // Set run type
diff --git a/src/tag/commands/review-validator.ts b/src/tag/commands/review-validator.ts
@@ -9,6 +9,7 @@ import { prepareMcpTools } from "../../mcp/install-mcp-server";
 import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools";
 import type { PrepareResult } from "../../prepare/types";
 import { generateReviewValidatorPrompt } from "../../create-prompt/templates/review-validator-prompt";
+import { loadSkill } from "../../utils/load-skill";
 
 export async function prepareReviewValidatorMode({
   context,
@@ -46,6 +47,7 @@ export async function prepareReviewValidatorMode({
   };
 
   const includeSuggestions = process.env.INCLUDE_SUGGESTIONS !== "false";
+  const reviewSkillContent = await loadSkill("review");
 
   await createPrompt({
     githubContext: context,
@@ -59,6 +61,7 @@ export async function prepareReviewValidatorMode({
     generatePrompt: generateReviewValidatorPrompt,
     reviewArtifacts,
     includeSuggestions,
+    reviewSkillContent,
   });
 
   core.exportVariable("DROID_EXEC_RUN_TYPE", "droid-review");
diff --git a/src/tag/commands/review.ts b/src/tag/commands/review.ts
@@ -9,6 +9,7 @@ import { createInitialComment } from "../../github/operations/comments/create-in
 import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools";
 import { isEntityContext } from "../../github/context";
 import { generateReviewCandidatesPrompt } from "../../create-prompt/templates/review-candidates-prompt";
+import { loadSkill } from "../../utils/load-skill";
 import type { Octokits } from "../../github/api/client";
 import type { PrepareResult } from "../../prepare/types";
 
@@ -85,6 +86,7 @@ export async function prepareReviewMode({
   });
 
   const includeSuggestions = process.env.INCLUDE_SUGGESTIONS !== "false";
+  const reviewSkillContent = await loadSkill("review");
 
   await createPrompt({
     githubContext: context,
@@ -98,6 +100,7 @@ export async function prepareReviewMode({
     generatePrompt: generateReviewCandidatesPrompt,
     reviewArtifacts,
     includeSuggestions,
+    reviewSkillContent,
   });
   core.exportVariable("DROID_EXEC_RUN_TYPE", "droid-review");
 
diff --git a/src/utils/load-skill.ts b/src/utils/load-skill.ts
@@ -0,0 +1,114 @@
+import { readFile, readdir } from "fs/promises";
+import { homedir } from "os";
+import { resolve, join } from "path";
+
+const MONO_REPO = "Factory-AI/factory-mono";
+const MONO_BRANCH = "feat/review-builtin-skill";
+
+const SHARED_BEGIN = "<!-- BEGIN_SHARED_METHODOLOGY -->";
+const SHARED_END = "<!-- END_SHARED_METHODOLOGY -->";
+
+/**
+ * Format skill content for inclusion in a CI prompt.
+ * Extracts only the shared methodology (between markers) so CI-specific
+ * instructions in the template remain authoritative for execution behavior.
+ */
+export function formatSkillSection(skillContent: string | undefined): string {
+  if (!skillContent) return "";
+  const methodology = extractSharedMethodology(skillContent);
+  return `
+<code_review_methodology>
+${methodology}
+</code_review_methodology>
+`;
+}
+
+/**
+ * Extract the shared methodology section from a skill's content.
+ * Looks for BEGIN_SHARED_METHODOLOGY / END_SHARED_METHODOLOGY markers.
+ * Returns the full content if markers are not found.
+ */
+export function extractSharedMethodology(content: string): string {
+  const beginIdx = content.indexOf(SHARED_BEGIN);
+  const endIdx = content.indexOf(SHARED_END);
+  if (beginIdx === -1 || endIdx === -1 || endIdx <= beginIdx) {
+    return content;
+  }
+  return content.slice(beginIdx + SHARED_BEGIN.length, endIdx).trim();
+}
+
+/**
+ * Load a skill from the local core plugin cache.
+ * The Droid CLI installs the core plugin to:
+ *   ~/.factory/plugins/cache/factory-plugins/core/<hash>/skills/<name>/SKILL.md
+ */
+async function loadSkillFromCache(
+  skillName: string,
+): Promise<string | undefined> {
+  const home = process.env.HOME || homedir();
+  const cacheDir = resolve(home, ".factory/plugins/cache/factory-plugins/core");
+
+  let entries: string[];
+  try {
+    entries = await readdir(cacheDir);
+  } catch {
+    return undefined;
+  }
+
+  for (const hash of entries) {
+    const skillPath = join(cacheDir, hash, "skills", skillName, "SKILL.md");
+    try {
+      const content = await readFile(skillPath, "utf8");
+      const trimmed = content.trim();
+      if (!trimmed) continue;
+      console.log(
+        `Loaded skill ${skillName} from ${skillPath} (${trimmed.length} bytes)`,
+      );
+      return trimmed;
+    } catch {
+      continue;
+    }
+  }
+
+  return undefined;
+}
+
+/**
+ * Fetch a skill from the factory-mono GitHub repo.
+ * Used as fallback when the local plugin cache is not available (e.g. CI).
+ */
+async function loadSkillFromGitHub(
+  skillName: string,
+): Promise<string | undefined> {
+  const url = `https://raw.githubusercontent.com/${MONO_REPO}/${MONO_BRANCH}/apps/cli/builtin-skills/${skillName}/SKILL.md`;
+  try {
+    const response = await fetch(url);
+    if (!response.ok) return undefined;
+    const content = await response.text();
+    const trimmed = content.trim();
+    if (!trimmed) return undefined;
+    console.log(
+      `Loaded skill ${skillName} from GitHub (${trimmed.length} bytes)`,
+    );
+    return trimmed;
+  } catch {
+    return undefined;
+  }
+}
+
+/**
+ * Load a skill by name. Tries the local plugin cache first,
+ * then falls back to fetching from the factory-mono GitHub repo.
+ * Throws if the skill cannot be loaded from either source.
+ */
+export async function loadSkill(skillName: string): Promise<string> {
+  const cached = await loadSkillFromCache(skillName);
+  if (cached) return cached;
+
+  const remote = await loadSkillFromGitHub(skillName);
+  if (remote) return remote;
+
+  throw new Error(
+    `Required skill "${skillName}" not found in local plugin cache or on GitHub (${MONO_REPO}).`,
+  );
+}
diff --git a/test/integration/review-flow.test.ts b/test/integration/review-flow.test.ts
@@ -8,6 +8,7 @@ import * as createInitial from "../../src/github/operations/comments/create-init
 import * as mcpInstaller from "../../src/mcp/install-mcp-server";
 import * as actorValidation from "../../src/github/validation/actor";
 import * as promptModule from "../../src/create-prompt";
+import * as skillLoader from "../../src/utils/load-skill";
 import * as core from "@actions/core";
 import * as childProcess from "node:child_process";
 
@@ -23,6 +24,7 @@ describe("review command integration", () => {
   let exportVarSpy: ReturnType<typeof spyOn>;
   let promptSpy: ReturnType<typeof spyOn>;
   let execSyncSpy: ReturnType<typeof spyOn>;
+  let loadSkillSpy: ReturnType<typeof spyOn>;
 
   beforeEach(async () => {
     tmpDir = await mkdtemp(path.join(os.tmpdir(), "review-int-"));
@@ -39,6 +41,9 @@ describe("review command integration", () => {
     promptSpy = spyOn(promptModule, "createPrompt").mockResolvedValue();
     setOutputSpy = spyOn(core, "setOutput").mockImplementation(() => {});
     exportVarSpy = spyOn(core, "exportVariable").mockImplementation(() => {});
+    loadSkillSpy = spyOn(skillLoader, "loadSkill").mockResolvedValue(
+      "mock skill content",
+    );
 
     execSyncSpy = spyOn(childProcess, "execSync").mockImplementation(((
       cmd: string,
@@ -60,6 +65,7 @@ describe("review command integration", () => {
     setOutputSpy.mockRestore();
     exportVarSpy.mockRestore();
     execSyncSpy.mockRestore();
+    loadSkillSpy.mockRestore();
 
     if (process.env.RUNNER_TEMP) {
       await rm(process.env.RUNNER_TEMP, { recursive: true, force: true });
diff --git a/test/modes/tag/review-command.test.ts b/test/modes/tag/review-command.test.ts
@@ -6,6 +6,7 @@ import { createMockContext } from "../../mockContext";
 import * as promptModule from "../../../src/create-prompt";
 import * as mcpInstaller from "../../../src/mcp/install-mcp-server";
 import * as comments from "../../../src/github/operations/comments/create-initial";
+import * as skillLoader from "../../../src/utils/load-skill";
 import * as childProcess from "child_process";
 import * as fsPromises from "fs/promises";
 
@@ -39,12 +40,16 @@ describe("prepareReviewMode", () => {
   let execSyncSpy: ReturnType<typeof spyOn>;
   let writeFileSpy: ReturnType<typeof spyOn>;
   let mkdirSpy: ReturnType<typeof spyOn>;
+  let loadSkillSpy: ReturnType<typeof spyOn>;
 
   beforeEach(() => {
     process.env.DROID_ARGS = "";
     delete process.env.REVIEW_MODEL;
     process.env.RUNNER_TEMP = "/tmp/test-runner";
 
+    loadSkillSpy = spyOn(skillLoader, "loadSkill").mockResolvedValue(
+      "mock skill content",
+    );
     promptSpy = spyOn(promptModule, "createPrompt").mockResolvedValue();
     mcpSpy = spyOn(mcpInstaller, "prepareMcpTools").mockResolvedValue(
       "mock-config",
@@ -84,6 +89,7 @@ describe("prepareReviewMode", () => {
     execSyncSpy.mockRestore();
     writeFileSpy.mockRestore();
     mkdirSpy.mockRestore();
+    loadSkillSpy.mockRestore();
     process.env.DROID_ARGS = originalArgs;
     if (originalReviewModel !== undefined) {
       process.env.REVIEW_MODEL = originalReviewModel;
diff --git a/test/utils/load-skill.test.ts b/test/utils/load-skill.test.ts