Skip to content

Commit abcf62f

Browse files
Release 0.6.7: Fix sync status page (#214)
* Add OG meta tags to demo and dashboard pages Add Open Graph and Twitter Card meta tags so sharing links on Slack, Discord, Twitter, etc. shows a proper preview card with the OSA branding image from osc.earth. * Add OSA logo, dark mode, and branded footer - Inline SVG OSA logo in header (uses currentColor for dark mode compatibility) - Dark mode CSS via prefers-color-scheme on demo page - Footer with dynamic year, heart, and OSC link - Consistent deep blue color palette across both pages * Reorder footer: copyright holder first, then made with love * Fix review issues: dark mode, meta tags, broken links - Add dark mode to dashboard (matching frontend theme) - Add <meta name="description"> to both pages for SEO - Remove redundant twitter:title/description/image tags - Fix broken GitHub links (osc-em -> OpenScience-Collective) - Standardize org name to "OpenScience Collective" - Move dashboard inline styles to CSS classes - Rename .footer to .site-footer for consistency * Enforce mandatory HED tag validation before display (#211) * Enforce mandatory HED tag validation before display Fixes #210 * Address review: consistent error handling, sanitize exceptions * Fix sync status page: community-aware, all sync types, N/A display (#213) * Fix sync status page: N/A display, all sync types, community-aware Fixes #212 - Fix Papers Sync showing N/A: sync_metadata stores source names like "openalex:query", not "openalex", so use prefix matching to find the most recent timestamp for each source type - Make /sync/status community-aware: accept community_id query param and query the correct community database (previously always used hed default) - Add all sync types to response: new 'syncs' field includes github, papers, docstrings, mailman, beps, faq with last_sync and next_run - Dashboard: pass community_id to sync status fetch, render all sync types dynamically including next scheduled run time * Address review findings: validation, future times, health endpoint - Validate community_id against registry (404 for unknown) - Update /sync/health to accept community_id param - Fix formatRelativeTime to handle future timestamps (next_run) - Use _parse_iso_datetime in _get_most_recent_sync for robust comparison - Fix misleading citing_doi comment in papers source lookup - Add exc_info=True to warning log calls for full tracebacks - Add tests: prefix matching regression, syncs field, unknown community 404 * Fix test: add isolated_db fixture to community_id test * Bump version to 0.6.7.dev0 * Address PR review findings - Fix papers last_sync timestamp comparison: use _parse_iso_datetime instead of string max() to avoid wrong result with mixed UTC offsets - Fix dashboard /sync/health URL: pass community_id param so health badge reflects the viewed community, not always 'hed' - Add tests for /sync/health community_id param: 404 for unknown, 200 for known community * Fix logging: add exc_info and upgrade scheduler warning to error - Add exc_info=True to trigger_sync error log for full tracebacks - Upgrade scheduler job inspection failure from warning to error
1 parent ee3ee37 commit abcf62f

File tree

6 files changed

+303
-71
lines changed

6 files changed

+303
-71
lines changed

dashboard/osa/index.html

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -762,8 +762,8 @@ <h2>Communities</h2>
762762
const [summaryResp, usageResp, syncResp, healthResp] = await Promise.all([
763763
fetch(`${API_BASE}/${encodeURIComponent(communityId)}/metrics/public`),
764764
fetch(`${API_BASE}/${encodeURIComponent(communityId)}/metrics/public/usage?period=${activePeriod}`),
765-
fetch(`${API_BASE}/sync/status`).catch(err => { console.warn('Sync status fetch failed (non-critical):', err.message); return null; }),
766-
fetch(`${API_BASE}/sync/health`).catch(err => { console.warn('Health check fetch failed (non-critical):', err.message); return null; }),
765+
fetch(`${API_BASE}/sync/status?community_id=${encodeURIComponent(communityId)}`).catch(err => { console.warn('Sync status fetch failed (non-critical):', err.message); return null; }),
766+
fetch(`${API_BASE}/sync/health?community_id=${encodeURIComponent(communityId)}`).catch(err => { console.warn('Health check fetch failed (non-critical):', err.message); return null; }),
767767
]);
768768

769769
const failedStatus = !summaryResp.ok ? summaryResp.status : (!usageResp.ok ? usageResp.status : null);
@@ -864,11 +864,44 @@ <h3 style="color:#1e293b;margin-bottom:0.5rem;font-size:0.9rem;font-weight:600;"
864864
renderToolsChart(summary.top_tools);
865865
}
866866

867+
const SYNC_LABELS = {
868+
github: 'GitHub Sync',
869+
papers: 'Papers Sync',
870+
docstrings: 'Docstrings Sync',
871+
mailman: 'Mailing List Sync',
872+
beps: 'BEPs Sync',
873+
faq: 'FAQ Sync',
874+
};
875+
876+
function renderSyncValue(item) {
877+
if (!item) return 'N/A';
878+
if (item.last_sync) {
879+
let val = formatRelativeTime(item.last_sync);
880+
if (item.next_run) val += ` <span style="color:#94a3b8;font-size:0.78rem;">(next: ${formatRelativeTime(item.next_run)})</span>`;
881+
return val;
882+
}
883+
if (item.next_run) return `Pending <span style="color:#94a3b8;font-size:0.78rem;">(next: ${formatRelativeTime(item.next_run)})</span>`;
884+
return 'N/A';
885+
}
886+
867887
function renderSyncInfo(sync) {
868888
if (!sync) return '';
889+
890+
// Use new syncs dict if available (community-aware, all sync types)
891+
if (sync.syncs && Object.keys(sync.syncs).length > 0) {
892+
const items = Object.entries(sync.syncs).map(([type, item]) => {
893+
const label = SYNC_LABELS[type] || type;
894+
return `<div class="sync-item">
895+
<div class="sync-item-label">${escapeHtml(label)}</div>
896+
<div class="sync-item-value">${renderSyncValue(item)}</div>
897+
</div>`;
898+
}).join('');
899+
return `<div class="sync-info">${items}</div>`;
900+
}
901+
902+
// Fallback to old format (github.repos / papers.sources)
869903
let lastGithub = 'N/A';
870904
let lastPapers = 'N/A';
871-
872905
if (sync.github && sync.github.repos) {
873906
const times = Object.values(sync.github.repos).map(r => r.last_sync).filter(Boolean);
874907
if (times.length > 0) lastGithub = formatRelativeTime(times.sort().reverse()[0]);
@@ -877,7 +910,6 @@ <h3 style="color:#1e293b;margin-bottom:0.5rem;font-size:0.9rem;font-weight:600;"
877910
const times = Object.values(sync.papers.sources).map(s => s.last_sync).filter(Boolean);
878911
if (times.length > 0) lastPapers = formatRelativeTime(times.sort().reverse()[0]);
879912
}
880-
881913
return `
882914
<div class="sync-info">
883915
<div class="sync-item">
@@ -894,11 +926,13 @@ <h3 style="color:#1e293b;margin-bottom:0.5rem;font-size:0.9rem;font-weight:600;"
894926
function formatRelativeTime(isoStr) {
895927
try {
896928
const diffMs = new Date() - new Date(isoStr);
897-
const diffHrs = Math.floor(diffMs / 3600000);
898-
if (diffHrs < 1) return 'Less than 1 hour ago';
899-
if (diffHrs < 24) return `${diffHrs} hour${diffHrs === 1 ? '' : 's'} ago`;
900-
const diffDays = Math.floor(diffHrs / 24);
901-
return `${diffDays} day${diffDays === 1 ? '' : 's'} ago`;
929+
const future = diffMs < 0;
930+
const absDiffHrs = Math.floor(Math.abs(diffMs) / 3600000);
931+
const suffix = future ? 'from now' : 'ago';
932+
if (absDiffHrs < 1) return future ? 'Less than 1 hour from now' : 'Less than 1 hour ago';
933+
if (absDiffHrs < 24) return `${absDiffHrs} hour${absDiffHrs === 1 ? '' : 's'} ${suffix}`;
934+
const absDiffDays = Math.floor(absDiffHrs / 24);
935+
return `${absDiffDays} day${absDiffDays === 1 ? '' : 's'} ${suffix}`;
902936
} catch (err) { console.warn('Failed to parse timestamp:', isoStr, err); return isoStr || 'N/A'; }
903937
}
904938

src/api/routers/sync.py

Lines changed: 112 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@
1010
from datetime import UTC, datetime
1111
from typing import Any
1212

13-
from fastapi import APIRouter, HTTPException
13+
from fastapi import APIRouter, HTTPException, Query
1414
from pydantic import BaseModel
1515

1616
from src.api.config import get_settings
1717
from src.api.scheduler import get_scheduler, run_sync_now
1818
from src.api.security import RequireAdminAuth
19+
from src.assistants import registry
1920
from src.knowledge.db import get_connection, get_stats
2021

2122
logger = logging.getLogger(__name__)
@@ -66,13 +67,24 @@ class HealthStatus(BaseModel):
6667
papers_age_hours: float | None
6768

6869

70+
class SyncItemStatus(BaseModel):
71+
"""Status for a single sync type."""
72+
73+
last_sync: str | None
74+
"""ISO timestamp of the most recent successful sync, or None if never synced."""
75+
next_run: str | None
76+
"""ISO timestamp of the next scheduled run, or None if not scheduled."""
77+
78+
6979
class SyncStatusResponse(BaseModel):
7080
"""Complete sync status response."""
7181

7282
github: GitHubStatus
7383
papers: PapersStatus
7484
scheduler: SchedulerStatus
7585
health: HealthStatus
86+
syncs: dict[str, SyncItemStatus] = {}
87+
"""Per-sync-type status: github, papers, docstrings, mailman, beps, faq."""
7688

7789

7890
class TriggerRequest(BaseModel):
@@ -89,44 +101,49 @@ class TriggerResponse(BaseModel):
89101
items_synced: dict[str, int]
90102

91103

92-
def _get_sync_metadata() -> dict[str, Any]:
93-
"""Get all sync metadata from database."""
94-
metadata: dict[str, Any] = {"github": {}, "papers": {}}
104+
def _get_sync_metadata(project: str = "hed") -> dict[str, Any]:
105+
"""Get all sync metadata from the community database.
106+
107+
Returns a dict keyed by source_type (github, papers, beps, docstrings,
108+
mailman, faq), each containing a dict of source_name -> metadata.
109+
"""
110+
metadata: dict[str, Any] = {}
95111

96112
try:
97-
with get_connection() as conn:
113+
with get_connection(project) as conn:
98114
rows = conn.execute(
99115
"SELECT source_type, source_name, last_sync_at, items_synced FROM sync_metadata"
100116
).fetchall()
101117

102118
for row in rows:
103119
source_type = row["source_type"]
104120
source_name = row["source_name"]
105-
if source_type in metadata:
106-
metadata[source_type][source_name] = {
107-
"last_sync": row["last_sync_at"],
108-
"items_synced": row["items_synced"],
109-
}
121+
if source_type not in metadata:
122+
metadata[source_type] = {}
123+
metadata[source_type][source_name] = {
124+
"last_sync": row["last_sync_at"],
125+
"items_synced": row["items_synced"],
126+
}
110127
except Exception as e:
111-
logger.warning("Failed to get sync metadata: %s", e)
128+
logger.warning("Failed to get sync metadata for %s: %s", project, e, exc_info=True)
112129

113130
return metadata
114131

115132

116-
def _get_repo_counts() -> dict[str, int]:
117-
"""Get item counts per repository."""
133+
def _get_repo_counts(project: str = "hed") -> dict[str, int]:
134+
"""Get item counts per repository for a community."""
118135
counts: dict[str, int] = {}
119136

120137
try:
121-
with get_connection() as conn:
138+
with get_connection(project) as conn:
122139
rows = conn.execute(
123140
"SELECT repo, COUNT(*) as count FROM github_items GROUP BY repo"
124141
).fetchall()
125142

126143
for row in rows:
127144
counts[row["repo"]] = row["count"]
128145
except Exception as e:
129-
logger.warning("Failed to get repo counts: %s", e)
146+
logger.warning("Failed to get repo counts for %s: %s", project, e, exc_info=True)
130147

131148
return counts
132149

@@ -187,20 +204,53 @@ def _calculate_health(metadata: dict[str, Any]) -> HealthStatus:
187204
)
188205

189206

207+
def _get_most_recent_sync(metadata: dict[str, Any], source_type: str) -> str | None:
208+
"""Return the most recent last_sync_at timestamp for a given source_type.
209+
210+
Parses timestamps via _parse_iso_datetime for correct temporal comparison
211+
rather than relying on lexicographic string ordering.
212+
"""
213+
entries = metadata.get(source_type, {})
214+
parsed: list[tuple[datetime, str]] = []
215+
for v in entries.values():
216+
raw = v.get("last_sync")
217+
if not raw:
218+
continue
219+
dt = _parse_iso_datetime(raw)
220+
if dt is not None:
221+
parsed.append((dt, raw))
222+
return max(parsed, key=lambda x: x[0])[1] if parsed else None
223+
224+
190225
@router.get("/status", response_model=SyncStatusResponse)
191-
async def get_sync_status() -> SyncStatusResponse:
192-
"""Get comprehensive sync status.
226+
async def get_sync_status(
227+
community_id: str | None = Query(default=None),
228+
) -> SyncStatusResponse:
229+
"""Get comprehensive sync status for a community.
230+
231+
Args:
232+
community_id: Community to query. Defaults to 'hed' if not specified.
193233
194234
Returns status of all knowledge sync jobs including:
195235
- GitHub issues/PRs counts and last sync times per repo
196236
- Papers counts and last sync times per source
237+
- All sync types (github, papers, docstrings, mailman, beps, faq) with
238+
last_sync and next_run timestamps
197239
- Scheduler status and next run times
198240
- Health check based on sync ages
199241
"""
242+
project = community_id or "hed"
243+
244+
if community_id is not None and registry.get(community_id) is None:
245+
raise HTTPException(
246+
status_code=404,
247+
detail=f"Community '{community_id}' not found.",
248+
)
249+
200250
settings = get_settings()
201-
stats = get_stats()
202-
metadata = _get_sync_metadata()
203-
repo_counts = _get_repo_counts()
251+
stats = get_stats(project)
252+
metadata = _get_sync_metadata(project)
253+
repo_counts = _get_repo_counts(project)
204254

205255
# Build GitHub repos status
206256
github_repos: dict[str, RepoStatus] = {}
@@ -211,13 +261,23 @@ async def get_sync_status() -> SyncStatusResponse:
211261
last_sync=repo_meta.get("last_sync"),
212262
)
213263

214-
# Build papers sources status
264+
# Build papers sources status using prefix matching.
265+
# Stored names are like "openalex:query", "semanticscholar:query", "pubmed:query".
266+
# "citing_{doi}" entries track citation lookups; they are not included here.
215267
papers_sources: dict[str, RepoStatus] = {}
216268
for source in ["openalex", "semanticscholar", "pubmed"]:
217-
source_meta = metadata.get("papers", {}).get(source, {})
269+
matching = {
270+
k: v for k, v in metadata.get("papers", {}).items() if k.startswith(f"{source}:")
271+
}
272+
parsed = [
273+
(dt, raw)
274+
for v in matching.values()
275+
if (raw := v.get("last_sync")) and (dt := _parse_iso_datetime(raw))
276+
]
277+
last_sync = max(parsed, key=lambda x: x[0])[1] if parsed else None
218278
papers_sources[source] = RepoStatus(
219279
items=stats.get(f"papers_{source}", 0),
220-
last_sync=source_meta.get("last_sync"),
280+
last_sync=last_sync,
221281
)
222282

223283
# Get scheduler info
@@ -230,7 +290,17 @@ async def get_sync_status() -> SyncStatusResponse:
230290
next_run = job.next_run_time.isoformat() if job.next_run_time else None
231291
jobs[job.id] = next_run
232292
except Exception as e:
233-
logger.warning("Failed to get next run times: %s", e)
293+
logger.error("Failed to get next run times: %s", e, exc_info=True)
294+
295+
# Build per-sync-type status for all known sync types
296+
all_sync_types = ("github", "papers", "docstrings", "mailman", "beps", "faq")
297+
syncs: dict[str, SyncItemStatus] = {}
298+
for sync_type in all_sync_types:
299+
last_sync = _get_most_recent_sync(metadata, sync_type)
300+
next_run = jobs.get(f"{sync_type}_{project}")
301+
# Include if there is any data or a scheduled next run
302+
if last_sync is not None or next_run is not None:
303+
syncs[sync_type] = SyncItemStatus(last_sync=last_sync, next_run=next_run)
234304

235305
return SyncStatusResponse(
236306
github=GitHubStatus(
@@ -250,6 +320,7 @@ async def get_sync_status() -> SyncStatusResponse:
250320
jobs=jobs,
251321
),
252322
health=_calculate_health(metadata),
323+
syncs=syncs,
253324
)
254325

255326

@@ -284,19 +355,32 @@ async def trigger_sync(
284355
items_synced=results,
285356
)
286357
except Exception as e:
287-
logger.error("Sync trigger failed: %s", e)
358+
logger.error("Sync trigger failed: %s", e, exc_info=True)
288359
raise HTTPException(status_code=500, detail=f"Sync failed: {e}") from e
289360

290361

291362
@router.get("/health")
292-
async def health_check() -> dict[str, Any]:
363+
async def health_check(
364+
community_id: str | None = Query(default=None),
365+
) -> dict[str, Any]:
293366
"""Simple health check endpoint for monitoring.
294367
368+
Args:
369+
community_id: Community to check. Defaults to 'hed' if not specified.
370+
295371
Returns a simple status suitable for uptime monitors.
296372
Returns 200 if healthy, 503 if unhealthy.
297373
"""
298-
stats = get_stats()
299-
metadata = _get_sync_metadata()
374+
project = community_id or "hed"
375+
376+
if community_id is not None and registry.get(community_id) is None:
377+
raise HTTPException(
378+
status_code=404,
379+
detail=f"Community '{community_id}' not found.",
380+
)
381+
382+
stats = get_stats(project)
383+
metadata = _get_sync_metadata(project)
300384
health = _calculate_health(metadata)
301385

302386
response = {

0 commit comments

Comments
 (0)