Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 42 additions & 8 deletions dashboard/osa/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,7 @@ <h2>Communities</h2>
const [summaryResp, usageResp, syncResp, healthResp] = await Promise.all([
fetch(`${API_BASE}/${encodeURIComponent(communityId)}/metrics/public`),
fetch(`${API_BASE}/${encodeURIComponent(communityId)}/metrics/public/usage?period=${activePeriod}`),
fetch(`${API_BASE}/sync/status`).catch(err => { console.warn('Sync status fetch failed (non-critical):', err.message); return null; }),
fetch(`${API_BASE}/sync/status?community_id=${encodeURIComponent(communityId)}`).catch(err => { console.warn('Sync status fetch failed (non-critical):', err.message); return null; }),
fetch(`${API_BASE}/sync/health`).catch(err => { console.warn('Health check fetch failed (non-critical):', err.message); return null; }),
]);

Expand Down Expand Up @@ -864,11 +864,44 @@ <h3 style="color:#1e293b;margin-bottom:0.5rem;font-size:0.9rem;font-weight:600;"
renderToolsChart(summary.top_tools);
}

const SYNC_LABELS = {
github: 'GitHub Sync',
papers: 'Papers Sync',
docstrings: 'Docstrings Sync',
mailman: 'Mailing List Sync',
beps: 'BEPs Sync',
faq: 'FAQ Sync',
};

function renderSyncValue(item) {
if (!item) return 'N/A';
if (item.last_sync) {
let val = formatRelativeTime(item.last_sync);
if (item.next_run) val += ` <span style="color:#94a3b8;font-size:0.78rem;">(next: ${formatRelativeTime(item.next_run)})</span>`;
return val;
}
if (item.next_run) return `Pending <span style="color:#94a3b8;font-size:0.78rem;">(next: ${formatRelativeTime(item.next_run)})</span>`;
return 'N/A';
}

function renderSyncInfo(sync) {
if (!sync) return '';

// Use new syncs dict if available (community-aware, all sync types)
if (sync.syncs && Object.keys(sync.syncs).length > 0) {
const items = Object.entries(sync.syncs).map(([type, item]) => {
const label = SYNC_LABELS[type] || type;
return `<div class="sync-item">
<div class="sync-item-label">${escapeHtml(label)}</div>
<div class="sync-item-value">${renderSyncValue(item)}</div>
</div>`;
}).join('');
return `<div class="sync-info">${items}</div>`;
}

// Fallback to old format (github.repos / papers.sources)
let lastGithub = 'N/A';
let lastPapers = 'N/A';

if (sync.github && sync.github.repos) {
const times = Object.values(sync.github.repos).map(r => r.last_sync).filter(Boolean);
if (times.length > 0) lastGithub = formatRelativeTime(times.sort().reverse()[0]);
Expand All @@ -877,7 +910,6 @@ <h3 style="color:#1e293b;margin-bottom:0.5rem;font-size:0.9rem;font-weight:600;"
const times = Object.values(sync.papers.sources).map(s => s.last_sync).filter(Boolean);
if (times.length > 0) lastPapers = formatRelativeTime(times.sort().reverse()[0]);
}

return `
<div class="sync-info">
<div class="sync-item">
Expand All @@ -894,11 +926,13 @@ <h3 style="color:#1e293b;margin-bottom:0.5rem;font-size:0.9rem;font-weight:600;"
function formatRelativeTime(isoStr) {
try {
const diffMs = new Date() - new Date(isoStr);
const diffHrs = Math.floor(diffMs / 3600000);
if (diffHrs < 1) return 'Less than 1 hour ago';
if (diffHrs < 24) return `${diffHrs} hour${diffHrs === 1 ? '' : 's'} ago`;
const diffDays = Math.floor(diffHrs / 24);
return `${diffDays} day${diffDays === 1 ? '' : 's'} ago`;
const future = diffMs < 0;
const absDiffHrs = Math.floor(Math.abs(diffMs) / 3600000);
const suffix = future ? 'from now' : 'ago';
if (absDiffHrs < 1) return future ? 'Less than 1 hour from now' : 'Less than 1 hour ago';
if (absDiffHrs < 24) return `${absDiffHrs} hour${absDiffHrs === 1 ? '' : 's'} ${suffix}`;
const absDiffDays = Math.floor(absDiffHrs / 24);
return `${absDiffDays} day${absDiffDays === 1 ? '' : 's'} ${suffix}`;
} catch (err) { console.warn('Failed to parse timestamp:', isoStr, err); return isoStr || 'N/A'; }
}

Expand Down
134 changes: 107 additions & 27 deletions src/api/routers/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
from datetime import UTC, datetime
from typing import Any

from fastapi import APIRouter, HTTPException
from fastapi import APIRouter, HTTPException, Query
from pydantic import BaseModel

from src.api.config import get_settings
from src.api.scheduler import get_scheduler, run_sync_now
from src.api.security import RequireAdminAuth
from src.assistants import registry
from src.knowledge.db import get_connection, get_stats

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -66,13 +67,24 @@ class HealthStatus(BaseModel):
papers_age_hours: float | None


class SyncItemStatus(BaseModel):
"""Status for a single sync type."""

last_sync: str | None
"""ISO timestamp of the most recent successful sync, or None if never synced."""
next_run: str | None
"""ISO timestamp of the next scheduled run, or None if not scheduled."""


class SyncStatusResponse(BaseModel):
"""Complete sync status response."""

github: GitHubStatus
papers: PapersStatus
scheduler: SchedulerStatus
health: HealthStatus
syncs: dict[str, SyncItemStatus] = {}
"""Per-sync-type status: github, papers, docstrings, mailman, beps, faq."""


class TriggerRequest(BaseModel):
Expand All @@ -89,44 +101,49 @@ class TriggerResponse(BaseModel):
items_synced: dict[str, int]


def _get_sync_metadata() -> dict[str, Any]:
"""Get all sync metadata from database."""
metadata: dict[str, Any] = {"github": {}, "papers": {}}
def _get_sync_metadata(project: str = "hed") -> dict[str, Any]:
"""Get all sync metadata from the community database.

Returns a dict keyed by source_type (github, papers, beps, docstrings,
mailman, faq), each containing a dict of source_name -> metadata.
"""
metadata: dict[str, Any] = {}

try:
with get_connection() as conn:
with get_connection(project) as conn:
rows = conn.execute(
"SELECT source_type, source_name, last_sync_at, items_synced FROM sync_metadata"
).fetchall()

for row in rows:
source_type = row["source_type"]
source_name = row["source_name"]
if source_type in metadata:
metadata[source_type][source_name] = {
"last_sync": row["last_sync_at"],
"items_synced": row["items_synced"],
}
if source_type not in metadata:
metadata[source_type] = {}
metadata[source_type][source_name] = {
"last_sync": row["last_sync_at"],
"items_synced": row["items_synced"],
}
except Exception as e:
logger.warning("Failed to get sync metadata: %s", e)
logger.warning("Failed to get sync metadata for %s: %s", project, e, exc_info=True)

return metadata


def _get_repo_counts() -> dict[str, int]:
"""Get item counts per repository."""
def _get_repo_counts(project: str = "hed") -> dict[str, int]:
"""Get item counts per repository for a community."""
counts: dict[str, int] = {}

try:
with get_connection() as conn:
with get_connection(project) as conn:
rows = conn.execute(
"SELECT repo, COUNT(*) as count FROM github_items GROUP BY repo"
).fetchall()

for row in rows:
counts[row["repo"]] = row["count"]
except Exception as e:
logger.warning("Failed to get repo counts: %s", e)
logger.warning("Failed to get repo counts for %s: %s", project, e, exc_info=True)

return counts

Expand Down Expand Up @@ -187,20 +204,53 @@ def _calculate_health(metadata: dict[str, Any]) -> HealthStatus:
)


def _get_most_recent_sync(metadata: dict[str, Any], source_type: str) -> str | None:
"""Return the most recent last_sync_at timestamp for a given source_type.

Parses timestamps via _parse_iso_datetime for correct temporal comparison
rather than relying on lexicographic string ordering.
"""
entries = metadata.get(source_type, {})
parsed: list[tuple[datetime, str]] = []
for v in entries.values():
raw = v.get("last_sync")
if not raw:
continue
dt = _parse_iso_datetime(raw)
if dt is not None:
parsed.append((dt, raw))
return max(parsed, key=lambda x: x[0])[1] if parsed else None


@router.get("/status", response_model=SyncStatusResponse)
async def get_sync_status() -> SyncStatusResponse:
"""Get comprehensive sync status.
async def get_sync_status(
community_id: str | None = Query(default=None),
) -> SyncStatusResponse:
"""Get comprehensive sync status for a community.

Args:
community_id: Community to query. Defaults to 'hed' if not specified.

Returns status of all knowledge sync jobs including:
- GitHub issues/PRs counts and last sync times per repo
- Papers counts and last sync times per source
- All sync types (github, papers, docstrings, mailman, beps, faq) with
last_sync and next_run timestamps
- Scheduler status and next run times
- Health check based on sync ages
"""
project = community_id or "hed"

if community_id is not None and registry.get(community_id) is None:
raise HTTPException(
status_code=404,
detail=f"Community '{community_id}' not found.",
)

settings = get_settings()
stats = get_stats()
metadata = _get_sync_metadata()
repo_counts = _get_repo_counts()
stats = get_stats(project)
metadata = _get_sync_metadata(project)
repo_counts = _get_repo_counts(project)

# Build GitHub repos status
github_repos: dict[str, RepoStatus] = {}
Expand All @@ -211,13 +261,19 @@ async def get_sync_status() -> SyncStatusResponse:
last_sync=repo_meta.get("last_sync"),
)

# Build papers sources status
# Build papers sources status using prefix matching.
# Stored names are like "openalex:query", "semanticscholar:query", "pubmed:query".
# "citing_{doi}" entries track citation lookups; they are not included here.
papers_sources: dict[str, RepoStatus] = {}
for source in ["openalex", "semanticscholar", "pubmed"]:
source_meta = metadata.get("papers", {}).get(source, {})
matching = {
k: v for k, v in metadata.get("papers", {}).items() if k.startswith(f"{source}:")
}
timestamps = [v["last_sync"] for v in matching.values() if v.get("last_sync")]
last_sync = max(timestamps) if timestamps else None
papers_sources[source] = RepoStatus(
items=stats.get(f"papers_{source}", 0),
last_sync=source_meta.get("last_sync"),
last_sync=last_sync,
)

# Get scheduler info
Expand All @@ -230,7 +286,17 @@ async def get_sync_status() -> SyncStatusResponse:
next_run = job.next_run_time.isoformat() if job.next_run_time else None
jobs[job.id] = next_run
except Exception as e:
logger.warning("Failed to get next run times: %s", e)
logger.warning("Failed to get next run times: %s", e, exc_info=True)

# Build per-sync-type status for all known sync types
all_sync_types = ("github", "papers", "docstrings", "mailman", "beps", "faq")
syncs: dict[str, SyncItemStatus] = {}
for sync_type in all_sync_types:
last_sync = _get_most_recent_sync(metadata, sync_type)
next_run = jobs.get(f"{sync_type}_{project}")
# Include if there is any data or a scheduled next run
if last_sync is not None or next_run is not None:
syncs[sync_type] = SyncItemStatus(last_sync=last_sync, next_run=next_run)

return SyncStatusResponse(
github=GitHubStatus(
Expand All @@ -250,6 +316,7 @@ async def get_sync_status() -> SyncStatusResponse:
jobs=jobs,
),
health=_calculate_health(metadata),
syncs=syncs,
)


Expand Down Expand Up @@ -289,14 +356,27 @@ async def trigger_sync(


@router.get("/health")
async def health_check() -> dict[str, Any]:
async def health_check(
community_id: str | None = Query(default=None),
) -> dict[str, Any]:
"""Simple health check endpoint for monitoring.

Args:
community_id: Community to check. Defaults to 'hed' if not specified.

Returns a simple status suitable for uptime monitors.
Returns 200 if healthy, 503 if unhealthy.
"""
stats = get_stats()
metadata = _get_sync_metadata()
project = community_id or "hed"

if community_id is not None and registry.get(community_id) is None:
raise HTTPException(
status_code=404,
detail=f"Community '{community_id}' not found.",
)

stats = get_stats(project)
metadata = _get_sync_metadata(project)
health = _calculate_health(metadata)

response = {
Expand Down
Loading