Skip to content

Commit 78c84c5

Browse files
OAGrclaude
andcommitted
fix(groundskeeper): prevent duplicate wellness issues, fix auto-update env vars, standardize API keys
Three fixes for groundskeeper operational issues: 1. Health-check duplicate issue creation: Stagger wellness workflow cron schedules (+5min, +10min), use stable issue title without timestamp, add title-based fallback search and post-creation dedup guard. 2. Auto-update missing env vars: Pass LONGTERMWIKI_SERVER_URL and scoped API keys to the CI pipeline step (was only in "Build data layer" step). Also switch paranoid review to use parseJsonFromLlm for better JSON recovery from truncated LLM responses. 3. API key consistency: Migrate session-sweep and groundskeeper wiki-server client from WIKI_SERVER_API_KEY to LONGTERMWIKI_PROJECT_KEY with LONGTERMWIKI_SERVER_API_KEY fallback, matching snapshot-retention pattern. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b518699 commit 78c84c5

File tree

10 files changed

+149
-36
lines changed

10 files changed

+149
-36
lines changed

.claude/audits.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,14 @@ post_merge:
165165
checked_date: "2026-03-05"
166166
notes: "Verified: all audits subcommands work (list, report, run-auto, check). YAML schema valid. 7 audits + 2 post-merge items tracked correctly. run-auto executes hybrid check_commands."
167167

168+
- id: groundskeeper-fixes-post-merge
169+
pr: 1813
170+
merged: "2026-03-06"
171+
claim: "Staggered wellness cron schedules prevent duplicate issue creation; auto-update env vars enable session log writing; groundskeeper API key migration works"
172+
how_to_verify: "After next 8AM/PM UTC wellness run, check that only 1 wellness issue is created (not duplicates). Check next auto-update run for no LONGTERMWIKI_SERVER_URL warnings. Check groundskeeper logs for no API key errors."
173+
status: pending
174+
deadline: "2026-03-20"
175+
168176
- id: reviewdog-ci-annotations
169177
pr: 1787
170178
merged: "2026-03-05"

.github/workflows/auto-update.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ jobs:
103103
FIRECRAWL_KEY: ${{ secrets.FIRECRAWL_KEY }}
104104
EXA_API_KEY: ${{ secrets.EXA_API_KEY }}
105105
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
106+
LONGTERMWIKI_SERVER_URL: ${{ secrets.LONGTERMWIKI_SERVER_URL }}
107+
LONGTERMWIKI_SERVER_API_KEY: ${{ secrets.LONGTERMWIKI_SERVER_API_KEY }}
108+
LONGTERMWIKI_PROJECT_KEY: ${{ secrets.LONGTERMWIKI_PROJECT_KEY }}
109+
LONGTERMWIKI_CONTENT_KEY: ${{ secrets.LONGTERMWIKI_CONTENT_KEY }}
106110
run: |
107111
ARGS="--budget=${{ inputs.budget || '30' }} --count=${{ inputs.count || '3' }} --verbose"
108112

.github/workflows/ci-pr-health.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ run-name: "CI & PR Health${{ vars.AUTOMATION_PAUSED == 'true' && ' [PAUSED]' ||
1414

1515
on:
1616
schedule:
17-
- cron: "0 8 * * *" # 8 AM UTC daily
18-
- cron: "0 20 * * *" # 8 PM UTC daily
17+
- cron: "10 8 * * *" # 8:10 AM UTC daily (staggered +10min from server-api-health to avoid duplicate issue creation)
18+
- cron: "10 20 * * *" # 8:10 PM UTC daily
1919
workflow_dispatch:
2020

2121
concurrency:

.github/workflows/frontend-data-health.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ name: Frontend & Data Health
1111

1212
on:
1313
schedule:
14-
- cron: "0 8 * * *" # 8 AM UTC daily
15-
- cron: "0 20 * * *" # 8 PM UTC daily
14+
- cron: "5 8 * * *" # 8:05 AM UTC daily (staggered +5min from server-api-health to avoid duplicate issue creation)
15+
- cron: "5 20 * * *" # 8:05 PM UTC daily
1616
workflow_dispatch:
1717

1818
concurrency:

apps/groundskeeper/src/tasks/session-sweep.test.ts

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,13 @@ describe("sessionSweep", () => {
6262

6363
beforeEach(() => {
6464
config = makeConfig();
65-
process.env["WIKI_SERVER_API_KEY"] = "test-key";
65+
process.env["LONGTERMWIKI_PROJECT_KEY"] = "test-key";
6666
});
6767

6868
afterEach(() => {
6969
vi.restoreAllMocks();
70-
delete process.env["WIKI_SERVER_API_KEY"];
70+
delete process.env["LONGTERMWIKI_PROJECT_KEY"];
71+
delete process.env["LONGTERMWIKI_SERVER_API_KEY"];
7172
});
7273

7374
it("returns success with no stale sessions message when swept=0", async () => {
@@ -101,15 +102,36 @@ describe("sessionSweep", () => {
101102
expect((calls[0][1] as RequestInit).method).toBe("POST");
102103
});
103104

104-
it("returns failure when API key is not set", async () => {
105-
delete process.env["WIKI_SERVER_API_KEY"];
105+
it("returns failure when no API key is set", async () => {
106+
delete process.env["LONGTERMWIKI_PROJECT_KEY"];
107+
delete process.env["LONGTERMWIKI_SERVER_API_KEY"];
106108

107109
const result = await sessionSweep(config);
108110

109111
expect(result.success).toBe(false);
110112
expect(result.summary).toContain("failed");
111113
});
112114

115+
it("uses LONGTERMWIKI_SERVER_API_KEY as fallback when project key is absent", async () => {
116+
delete process.env["LONGTERMWIKI_PROJECT_KEY"];
117+
process.env["LONGTERMWIKI_SERVER_API_KEY"] = "legacy-superkey";
118+
119+
vi.stubGlobal(
120+
"fetch",
121+
vi.fn().mockResolvedValue({
122+
ok: true,
123+
json: async () => ({ swept: 0, sessions: [] }),
124+
})
125+
);
126+
127+
const result = await sessionSweep(config);
128+
129+
expect(result.success).toBe(true);
130+
const calls = vi.mocked(fetch).mock.calls;
131+
const headers = (calls[0][1] as RequestInit).headers as Record<string, string>;
132+
expect(headers.Authorization).toBe("Bearer legacy-superkey");
133+
});
134+
113135
it("returns failure when sweep endpoint fails", async () => {
114136
vi.stubGlobal(
115137
"fetch",

apps/groundskeeper/src/tasks/session-sweep.ts

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,16 @@ async function callSweepEndpoint(
2727
timeoutHours: number,
2828
): Promise<SweepResponse | null> {
2929
const url = `${config.wikiServerUrl}/api/agent-sessions/sweep`;
30-
const apiKey = process.env["WIKI_SERVER_API_KEY"];
30+
// Use project-scoped key (preferred) or legacy superkey (fallback).
31+
// The /api/agent-sessions/* routes require `project` scope.
32+
const apiKey =
33+
process.env["LONGTERMWIKI_PROJECT_KEY"] ??
34+
process.env["LONGTERMWIKI_SERVER_API_KEY"];
3135

3236
if (!apiKey) {
33-
logger.warn("WIKI_SERVER_API_KEY not set, skipping sweep");
37+
logger.warn(
38+
"Neither LONGTERMWIKI_PROJECT_KEY nor LONGTERMWIKI_SERVER_API_KEY is set — skipping sweep",
39+
);
3440
return null;
3541
}
3642

apps/groundskeeper/src/wiki-server.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,15 @@ async function apiRequest<T>(
2323
body?: unknown,
2424
): Promise<ApiResult<T>> {
2525
const url = `${config.wikiServerUrl}${path}`;
26-
const apiKey = process.env["WIKI_SERVER_API_KEY"];
26+
// Use project-scoped key (preferred) or legacy superkey (fallback).
27+
// Do NOT use WIKI_SERVER_API_KEY — that name is ambiguous and may refer
28+
// to a different key scope. See snapshot-retention.ts for the same pattern.
29+
const apiKey =
30+
process.env["LONGTERMWIKI_PROJECT_KEY"] ??
31+
process.env["LONGTERMWIKI_SERVER_API_KEY"];
2732

2833
if (!apiKey) {
29-
return { ok: false, error: "WIKI_SERVER_API_KEY not set" };
34+
return { ok: false, error: "Neither LONGTERMWIKI_PROJECT_KEY nor LONGTERMWIKI_SERVER_API_KEY is set" };
3035
}
3136

3237
try {

crux/auto-update/ci-orchestrate.ts

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import {
3232
import { computeRiskScores } from './ci-risk-scores.ts';
3333
import { runContentChecks } from './ci-content-checks.ts';
3434
import { buildPrBody } from './ci-pr-body.ts';
35+
import { parseJsonFromLlm } from '../lib/json-parsing.ts';
3536

3637
// ── Types ────────────────────────────────────────────────────────────────────
3738

@@ -255,27 +256,18 @@ async function runParanoidReview(verbose: boolean): Promise<{ alerts: ReviewAler
255256
continue;
256257
}
257258

258-
// Extract JSON line (guards against pnpm/dotenv preamble)
259-
const jsonLine = resultRaw
260-
.split('\n')
261-
.filter(line => line.trim().startsWith('{'))
262-
.pop();
263-
264-
if (!jsonLine) {
265-
console.warn(`::warning::${pageId} -- review produced no JSON output`);
266-
continue;
267-
}
268-
269-
let result: {
259+
// Parse JSON from LLM output (handles code fences, preamble, truncation)
260+
const result = parseJsonFromLlm<{
270261
needsReResearch?: boolean;
271262
gapCount?: number;
272263
overallAssessment?: string;
273264
error?: string;
274-
};
275-
try {
276-
result = JSON.parse(jsonLine);
277-
} catch {
278-
console.warn(`::warning::${pageId} -- review JSON parse failed`);
265+
}>(resultRaw, `paranoid-review:${pageId}`, () => {
266+
console.warn(`::warning::${pageId} -- review JSON could not be parsed, skipping`);
267+
return { error: 'unparseable' };
268+
});
269+
270+
if (result.error === 'unparseable') {
279271
continue;
280272
}
281273

crux/health/wellness-report.test.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import { describe, it, expect } from 'vitest';
1212
import type { CheckResult } from './health-check.ts';
13-
import { buildWellnessReport } from './wellness-report.ts';
13+
import { buildWellnessReport, WELLNESS_ISSUE_TITLE } from './wellness-report.ts';
1414

1515
function makeCheck(overrides: Partial<CheckResult> = {}): CheckResult {
1616
return {
@@ -139,3 +139,12 @@ describe('buildWellnessReport', () => {
139139
expect(report.markdownSummary).not.toContain('<details>');
140140
});
141141
});
142+
143+
describe('WELLNESS_ISSUE_TITLE', () => {
144+
it('is a stable string without a timestamp', () => {
145+
// The title must be stable (no timestamp) so that concurrent workflow
146+
// runs can find each other's issues and avoid duplicates.
147+
expect(WELLNESS_ISSUE_TITLE).toBe('System wellness check failing');
148+
expect(WELLNESS_ISSUE_TITLE).not.toMatch(/\d{4}-\d{2}-\d{2}/);
149+
});
150+
});

crux/health/wellness-report.ts

Lines changed: 73 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ export function buildWellnessReport(checks: CheckResult[]): WellnessReport {
113113
// GitHub issue management
114114
// ─────────────────────────────────────────────────────────────────────────────
115115

116+
export const WELLNESS_ISSUE_TITLE = 'System wellness check failing';
117+
116118
interface GitHubIssue {
117119
number: number;
118120
state: string;
@@ -123,22 +125,82 @@ interface GitHubIssue {
123125
/**
124126
* Find the existing open wellness issue (if any).
125127
* Returns the issue number, or null if none exists.
128+
*
129+
* Uses a two-stage search: first by label (fast, indexed), then by title
130+
* prefix as fallback (catches cases where the label was manually removed).
126131
*/
127132
async function findOpenWellnessIssue(): Promise<number | null> {
128133
try {
129-
const issues = await githubApi<GitHubIssue[]>(
130-
`/repos/${REPO}/issues?labels=wellness&state=open&per_page=1`,
134+
// Primary: search by label
135+
const byLabel = await githubApi<GitHubIssue[]>(
136+
`/repos/${REPO}/issues?labels=wellness&state=open&per_page=5`,
131137
);
132-
if (issues.length > 0) {
133-
return issues[0].number;
138+
if (byLabel.length > 0) {
139+
return byLabel[0].number;
134140
}
141+
142+
// Fallback: search recent open issues by title prefix
143+
const recent = await githubApi<GitHubIssue[]>(
144+
`/repos/${REPO}/issues?state=open&per_page=30&sort=created&direction=desc`,
145+
);
146+
const match = recent.find((i) => i.title.startsWith(WELLNESS_ISSUE_TITLE));
147+
if (match) {
148+
return match.number;
149+
}
150+
135151
return null;
136152
} catch {
137153
// GitHub API failure — don't block the report
138154
return null;
139155
}
140156
}
141157

158+
/**
159+
* Close duplicate wellness issues that were created by concurrent workflow runs.
160+
* Keeps the oldest (lowest number) and closes the rest as duplicates.
161+
*/
162+
async function deduplicateWellnessIssues(): Promise<void> {
163+
try {
164+
// Brief delay to let concurrent creates finish
165+
await new Promise((r) => setTimeout(r, 2000));
166+
167+
const openIssues = await githubApi<GitHubIssue[]>(
168+
`/repos/${REPO}/issues?labels=wellness&state=open&per_page=10`,
169+
);
170+
171+
if (openIssues.length <= 1) return;
172+
173+
// Keep the oldest (lowest number), close the rest
174+
const sorted = [...openIssues].sort((a, b) => a.number - b.number);
175+
const keeper = sorted[0];
176+
177+
for (const issue of sorted.slice(1)) {
178+
try {
179+
await githubApi(`/repos/${REPO}/issues/${issue.number}/comments`, {
180+
method: 'POST',
181+
body: {
182+
body: `Closing as duplicate of #${keeper.number} (created by concurrent wellness check workflow).`,
183+
},
184+
});
185+
await githubApi(`/repos/${REPO}/issues/${issue.number}`, {
186+
method: 'PATCH',
187+
body: { state: 'closed' },
188+
});
189+
console.log(`Closed duplicate wellness issue #${issue.number} (keeping #${keeper.number})`);
190+
} catch (err) {
191+
console.warn(
192+
`Failed to close duplicate #${issue.number}: ${err instanceof Error ? err.message : String(err)}`,
193+
);
194+
}
195+
}
196+
} catch (err) {
197+
// Best-effort dedup — don't fail the workflow over this
198+
console.warn(
199+
`Dedup check failed: ${err instanceof Error ? err.message : String(err)}`,
200+
);
201+
}
202+
}
203+
142204
/**
143205
* Ensure the "wellness" label exists on the repo.
144206
* No-ops if it already exists (409 Conflict).
@@ -192,19 +254,24 @@ export async function manageWellnessIssue(
192254
console.log(`Updated existing wellness issue #${existingIssue}`);
193255
return { action: 'updated', issueNumber: existingIssue };
194256
} else {
195-
// Create new issue
257+
// Create new issue with a stable title (no timestamp) so concurrent
258+
// workflows can find it via findOpenWellnessIssue(). The timestamp
259+
// is already in the issue body.
196260
const created = await githubApi<{ number: number }>(
197261
`/repos/${REPO}/issues`,
198262
{
199263
method: 'POST',
200264
body: {
201-
title: `System wellness check failing (${report.timestamp})`,
265+
title: WELLNESS_ISSUE_TITLE,
202266
body: report.issueBody,
203267
labels: ['wellness', 'bug'],
204268
},
205269
},
206270
);
207271

272+
// Best-effort dedup: close any duplicates from concurrent workflows
273+
await deduplicateWellnessIssues();
274+
208275
console.log(`Created new wellness issue #${created.number}`);
209276
return { action: 'created', issueNumber: created.number };
210277
}

0 commit comments

Comments
 (0)