Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dependencies = [
"docker>=7.1.0",
"fastapi>=0.115.12,<1",
"gunicorn>=23.0.0",
"jinja2>=3.0",
"linkml==1.9.3",
"loguru",
"oaklib>=0.6.6",
Expand Down
3 changes: 2 additions & 1 deletion backend/src/monarch_py/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import RedirectResponse

from monarch_py.api import association, entity, histopheno, search, semsim, text_annotation
from monarch_py.api import association, entity, histopheno, meta, search, semsim, text_annotation
from monarch_py.api.config import semsimian, spacyner, settings
from monarch_py.api.middleware.logging_middleware import LoggingMiddleware
from monarch_py.utils.utils import get_release_metadata, get_release_versions
Expand All @@ -28,6 +28,7 @@ async def lifespan(app: FastAPI):
app.include_router(association.router, prefix=f"{PREFIX}/association")
app.include_router(entity.router, prefix=f"{PREFIX}/entity")
app.include_router(histopheno.router, prefix=f"{PREFIX}/histopheno")
app.include_router(meta.router, prefix=PREFIX)
app.include_router(search.router, prefix=PREFIX)
app.include_router(semsim.router, prefix=f"{PREFIX}/semsim")
app.include_router(text_annotation.router, prefix=PREFIX)
Expand Down
89 changes: 89 additions & 0 deletions backend/src/monarch_py/api/meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Meta endpoint for serving HTML with dynamic Open Graph tags to social media crawlers."""

from pathlib import Path

from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import HTMLResponse
from jinja2 import Environment, FileSystemLoader, select_autoescape

from monarch_py.api.config import solr

router = APIRouter(tags=["meta"])

MAX_DESCRIPTION_LENGTH = 300

TEMPLATES_DIR = Path(__file__).parent / "templates"
jinja_env = Environment(
loader=FileSystemLoader(TEMPLATES_DIR),
autoescape=select_autoescape(["html", "xml"]),
)


def get_base_url(request: Request) -> str:
"""Derive base URL from the request headers (supports beta/prod via same stack)."""
scheme = request.headers.get("x-forwarded-proto", request.url.scheme)
host = request.headers.get("host", request.url.netloc)
return f"{scheme}://{host}"


def get_default_image(request: Request) -> str:
"""Get the default OG image URL."""
return f"{get_base_url(request)}/share-thumbnail.jpg"


@router.get("/meta/{entity_id:path}", response_class=HTMLResponse, include_in_schema=False)
async def get_meta_page(entity_id: str, request: Request) -> HTMLResponse:
"""
Return an HTML page with dynamic Open Graph meta tags for the given entity.

This endpoint is designed to be called by social media crawlers (Slackbot,
Twitterbot, etc.) to get entity-specific link previews. Regular users
should be served the SPA directly by Nginx.

Args:
entity_id: The entity identifier (e.g., MONDO:0005148)
request: The FastAPI request object

Returns:
HTML page with entity-specific OG meta tags

Raises:
HTTPException: 404 if entity not found
"""
entity = solr().get_entity(entity_id, extra=False)
if entity is None:
raise HTTPException(status_code=404, detail=f"Entity not found: {entity_id}")

base_url = get_base_url(request)
entity_url = f"{base_url}/{entity_id}"

entity_name = entity.name or entity_id
title = f"{entity_name} | Monarch Initiative"

description_parts = []
if entity.name:
description_parts.append(entity.name)
if entity.description:
description_parts.append(entity.description)

if description_parts:
description = " - ".join(description_parts)
else:
description = f"View {entity_id} on Monarch Initiative"

if len(description) > MAX_DESCRIPTION_LENGTH:
description = description[:MAX_DESCRIPTION_LENGTH].rsplit(" ", 1)[0] + "..."

template = jinja_env.get_template("meta.html")
html_content = template.render(
title=title,
description=description,
url=entity_url,
image=get_default_image(request),
)

return HTMLResponse(
content=html_content,
status_code=200,
headers={"Cache-Control": "public, max-age=3600"},
)
35 changes: 35 additions & 0 deletions backend/src/monarch_py/api/templates/meta.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />

<!-- basic -->
<title>{{ title }}</title>
<meta name="title" content="{{ title }}" />
<meta name="description" content="{{ description }}" />

<!-- open graph -->
<meta property="og:type" content="website" />
<meta property="og:url" content="{{ url }}" />
<meta property="og:title" content="{{ title }}" />
<meta property="og:description" content="{{ description }}" />
<meta property="og:image" content="{{ image }}" />
<meta property="og:site_name" content="Monarch Initiative" />

<!-- twitter -->
<meta property="twitter:card" content="summary_large_image" />
<meta property="twitter:url" content="{{ url }}" />
<meta property="twitter:title" content="{{ title }}" />
<meta property="twitter:description" content="{{ description }}" />
<meta property="twitter:image" content="{{ image }}" />

<!-- redirect browsers to the real page -->
<meta http-equiv="refresh" content="0;url={{ url }}" />
</head>
<body>
<h1>{{ title }}</h1>
<p>{{ description }}</p>
<p>Redirecting to <a href="{{ url }}">{{ url }}</a>...</p>
</body>
</html>
149 changes: 149 additions & 0 deletions backend/tests/api/test_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""Tests for the meta endpoint that serves dynamic OG tags to crawlers."""

from unittest.mock import patch

import pytest
from fastapi.testclient import TestClient

from monarch_py.api.main import app
from monarch_py.datamodels.model import Node


@pytest.fixture
def client():
return TestClient(app)


@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
def test_meta_endpoint_returns_html_with_og_tags(mock_get_entity, client, node):
"""Test that /meta/{entity_id} returns HTML with entity-specific OG tags."""
mock_get_entity.return_value = Node(**node)
response = client.get("/v3/api/meta/MONDO:0020121")

assert response.status_code == 200
assert response.headers["content-type"] == "text/html; charset=utf-8"

html = response.text
assert "MONDO:0020121" in html
assert "muscular dystrophy" in html.lower()
assert 'og:title' in html
assert 'og:description' in html
assert 'og:url' in html
assert 'testserver/MONDO:0020121' in html


@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
def test_meta_endpoint_returns_404_for_unknown_entity(mock_get_entity, client):
"""Test that /meta/{entity_id} returns 404 for non-existent entities."""
mock_get_entity.return_value = None
response = client.get("/v3/api/meta/FAKE:9999999")

assert response.status_code == 404


@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
def test_meta_endpoint_escapes_html_in_content(mock_get_entity, client):
"""Test that entity content is properly HTML-escaped to prevent XSS."""
mock_get_entity.return_value = Node(
id="TEST:001",
category="biolink:Disease",
name='<script>alert("xss")</script>',
description='A "test" <b>entity</b> with & special chars',
provided_by="test",
association_counts=[],
)
response = client.get("/v3/api/meta/TEST:001")

assert response.status_code == 200
html = response.text
# Jinja2 autoescape should escape angle brackets
assert "<script>" not in html
assert "&lt;script&gt;" in html


@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
def test_meta_endpoint_truncates_long_description(mock_get_entity, client):
"""Test that very long descriptions are truncated at a word boundary."""
long_description = "word " * 200 # 1000 chars, well over the 300 limit
mock_get_entity.return_value = Node(
id="TEST:002",
category="biolink:Disease",
name="Test Entity",
description=long_description.strip(),
provided_by="test",
association_counts=[],
)
response = client.get("/v3/api/meta/TEST:002")

assert response.status_code == 200
html = response.text
assert "..." in html
# Description (including "Test Entity - " prefix) should be truncated near the limit
assert long_description.strip() not in html


@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
def test_meta_endpoint_entity_with_no_name(mock_get_entity, client):
"""Test that entities without a name use the entity ID instead."""
mock_get_entity.return_value = Node(
id="TEST:003",
category="biolink:Disease",
name=None,
description="A test description",
provided_by="test",
association_counts=[],
)
response = client.get("/v3/api/meta/TEST:003")

assert response.status_code == 200
html = response.text
assert "TEST:003 | Monarch Initiative" in html
assert "A test description" in html


@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
def test_meta_endpoint_entity_with_no_description(mock_get_entity, client):
"""Test that entities without a description still produce valid OG tags."""
mock_get_entity.return_value = Node(
id="TEST:004",
category="biolink:Disease",
name="Test Entity",
description=None,
provided_by="test",
association_counts=[],
)
response = client.get("/v3/api/meta/TEST:004")

assert response.status_code == 200
html = response.text
assert "Test Entity | Monarch Initiative" in html
assert 'og:description' in html


@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
def test_meta_endpoint_entity_with_no_name_or_description(mock_get_entity, client):
"""Test that entities with neither name nor description use fallback text."""
mock_get_entity.return_value = Node(
id="TEST:005",
category="biolink:Disease",
name=None,
description=None,
provided_by="test",
association_counts=[],
)
response = client.get("/v3/api/meta/TEST:005")

assert response.status_code == 200
html = response.text
assert "TEST:005 | Monarch Initiative" in html
assert "View TEST:005 on Monarch Initiative" in html


@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
def test_meta_endpoint_returns_cache_control_header(mock_get_entity, client, node):
"""Test that the response includes a Cache-Control header."""
mock_get_entity.return_value = Node(**node)
response = client.get("/v3/api/meta/MONDO:0020121")

assert response.status_code == 200
assert response.headers["cache-control"] == "public, max-age=3600"
2 changes: 2 additions & 0 deletions backend/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading