Skip to content

Commit 3453d6a

Browse files
authored
Merge pull request #1251 from monarch-initiative/issue-1242-link-previews
feat: Dynamic Open Graph meta tags for social media link previews
2 parents b819149 + bfadb8a commit 3453d6a

File tree

10 files changed

+657
-1
lines changed

10 files changed

+657
-1
lines changed

backend/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ dependencies = [
2020
"docker>=7.1.0",
2121
"fastapi>=0.115.12,<1",
2222
"gunicorn>=23.0.0",
23+
"jinja2>=3.0",
2324
"linkml==1.9.3",
2425
"loguru",
2526
"oaklib>=0.6.6",

backend/src/monarch_py/api/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from fastapi.middleware.cors import CORSMiddleware
55
from fastapi.responses import RedirectResponse
66

7-
from monarch_py.api import association, entity, histopheno, search, semsim, text_annotation
7+
from monarch_py.api import association, entity, histopheno, meta, search, semsim, text_annotation
88
from monarch_py.api.config import semsimian, spacyner, settings
99
from monarch_py.api.middleware.logging_middleware import LoggingMiddleware
1010
from monarch_py.utils.utils import get_release_metadata, get_release_versions
@@ -28,6 +28,7 @@ async def lifespan(app: FastAPI):
2828
app.include_router(association.router, prefix=f"{PREFIX}/association")
2929
app.include_router(entity.router, prefix=f"{PREFIX}/entity")
3030
app.include_router(histopheno.router, prefix=f"{PREFIX}/histopheno")
31+
app.include_router(meta.router, prefix=PREFIX)
3132
app.include_router(search.router, prefix=PREFIX)
3233
app.include_router(semsim.router, prefix=f"{PREFIX}/semsim")
3334
app.include_router(text_annotation.router, prefix=PREFIX)

backend/src/monarch_py/api/meta.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
"""Meta endpoint for serving HTML with dynamic Open Graph tags to social media crawlers."""
2+
3+
from pathlib import Path
4+
5+
from fastapi import APIRouter, HTTPException, Request
6+
from fastapi.responses import HTMLResponse
7+
from jinja2 import Environment, FileSystemLoader, select_autoescape
8+
9+
from monarch_py.api.config import solr
10+
11+
router = APIRouter(tags=["meta"])
12+
13+
MAX_DESCRIPTION_LENGTH = 300
14+
15+
TEMPLATES_DIR = Path(__file__).parent / "templates"
16+
jinja_env = Environment(
17+
loader=FileSystemLoader(TEMPLATES_DIR),
18+
autoescape=select_autoescape(["html", "xml"]),
19+
)
20+
21+
22+
def get_base_url(request: Request) -> str:
23+
"""Derive base URL from the request headers (supports beta/prod via same stack)."""
24+
scheme = request.headers.get("x-forwarded-proto", request.url.scheme)
25+
host = request.headers.get("host", request.url.netloc)
26+
return f"{scheme}://{host}"
27+
28+
29+
def get_default_image(request: Request) -> str:
30+
"""Get the default OG image URL."""
31+
return f"{get_base_url(request)}/share-thumbnail.jpg"
32+
33+
34+
@router.get("/meta/{entity_id:path}", response_class=HTMLResponse, include_in_schema=False)
35+
async def get_meta_page(entity_id: str, request: Request) -> HTMLResponse:
36+
"""
37+
Return an HTML page with dynamic Open Graph meta tags for the given entity.
38+
39+
This endpoint is designed to be called by social media crawlers (Slackbot,
40+
Twitterbot, etc.) to get entity-specific link previews. Regular users
41+
should be served the SPA directly by Nginx.
42+
43+
Args:
44+
entity_id: The entity identifier (e.g., MONDO:0005148)
45+
request: The FastAPI request object
46+
47+
Returns:
48+
HTML page with entity-specific OG meta tags
49+
50+
Raises:
51+
HTTPException: 404 if entity not found
52+
"""
53+
entity = solr().get_entity(entity_id, extra=False)
54+
if entity is None:
55+
raise HTTPException(status_code=404, detail=f"Entity not found: {entity_id}")
56+
57+
base_url = get_base_url(request)
58+
entity_url = f"{base_url}/{entity_id}"
59+
60+
entity_name = entity.name or entity_id
61+
title = f"{entity_name} | Monarch Initiative"
62+
63+
description_parts = []
64+
if entity.name:
65+
description_parts.append(entity.name)
66+
if entity.description:
67+
description_parts.append(entity.description)
68+
69+
if description_parts:
70+
description = " - ".join(description_parts)
71+
else:
72+
description = f"View {entity_id} on Monarch Initiative"
73+
74+
if len(description) > MAX_DESCRIPTION_LENGTH:
75+
description = description[:MAX_DESCRIPTION_LENGTH].rsplit(" ", 1)[0] + "..."
76+
77+
template = jinja_env.get_template("meta.html")
78+
html_content = template.render(
79+
title=title,
80+
description=description,
81+
url=entity_url,
82+
image=get_default_image(request),
83+
)
84+
85+
return HTMLResponse(
86+
content=html_content,
87+
status_code=200,
88+
headers={"Cache-Control": "public, max-age=3600"},
89+
)
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8" />
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
6+
7+
<!-- basic -->
8+
<title>{{ title }}</title>
9+
<meta name="title" content="{{ title }}" />
10+
<meta name="description" content="{{ description }}" />
11+
12+
<!-- open graph -->
13+
<meta property="og:type" content="website" />
14+
<meta property="og:url" content="{{ url }}" />
15+
<meta property="og:title" content="{{ title }}" />
16+
<meta property="og:description" content="{{ description }}" />
17+
<meta property="og:image" content="{{ image }}" />
18+
<meta property="og:site_name" content="Monarch Initiative" />
19+
20+
<!-- twitter -->
21+
<meta property="twitter:card" content="summary_large_image" />
22+
<meta property="twitter:url" content="{{ url }}" />
23+
<meta property="twitter:title" content="{{ title }}" />
24+
<meta property="twitter:description" content="{{ description }}" />
25+
<meta property="twitter:image" content="{{ image }}" />
26+
27+
<!-- redirect browsers to the real page -->
28+
<meta http-equiv="refresh" content="0;url={{ url }}" />
29+
</head>
30+
<body>
31+
<h1>{{ title }}</h1>
32+
<p>{{ description }}</p>
33+
<p>Redirecting to <a href="{{ url }}">{{ url }}</a>...</p>
34+
</body>
35+
</html>

backend/tests/api/test_meta.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
"""Tests for the meta endpoint that serves dynamic OG tags to crawlers."""
2+
3+
from unittest.mock import patch
4+
5+
import pytest
6+
from fastapi.testclient import TestClient
7+
8+
from monarch_py.api.main import app
9+
from monarch_py.datamodels.model import Node
10+
11+
12+
@pytest.fixture
13+
def client():
14+
return TestClient(app)
15+
16+
17+
@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
18+
def test_meta_endpoint_returns_html_with_og_tags(mock_get_entity, client, node):
19+
"""Test that /meta/{entity_id} returns HTML with entity-specific OG tags."""
20+
mock_get_entity.return_value = Node(**node)
21+
response = client.get("/v3/api/meta/MONDO:0020121")
22+
23+
assert response.status_code == 200
24+
assert response.headers["content-type"] == "text/html; charset=utf-8"
25+
26+
html = response.text
27+
assert "MONDO:0020121" in html
28+
assert "muscular dystrophy" in html.lower()
29+
assert 'og:title' in html
30+
assert 'og:description' in html
31+
assert 'og:url' in html
32+
assert 'testserver/MONDO:0020121' in html
33+
34+
35+
@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
36+
def test_meta_endpoint_returns_404_for_unknown_entity(mock_get_entity, client):
37+
"""Test that /meta/{entity_id} returns 404 for non-existent entities."""
38+
mock_get_entity.return_value = None
39+
response = client.get("/v3/api/meta/FAKE:9999999")
40+
41+
assert response.status_code == 404
42+
43+
44+
@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
45+
def test_meta_endpoint_escapes_html_in_content(mock_get_entity, client):
46+
"""Test that entity content is properly HTML-escaped to prevent XSS."""
47+
mock_get_entity.return_value = Node(
48+
id="TEST:001",
49+
category="biolink:Disease",
50+
name='<script>alert("xss")</script>',
51+
description='A "test" <b>entity</b> with & special chars',
52+
provided_by="test",
53+
association_counts=[],
54+
)
55+
response = client.get("/v3/api/meta/TEST:001")
56+
57+
assert response.status_code == 200
58+
html = response.text
59+
# Jinja2 autoescape should escape angle brackets
60+
assert "<script>" not in html
61+
assert "&lt;script&gt;" in html
62+
63+
64+
@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
65+
def test_meta_endpoint_truncates_long_description(mock_get_entity, client):
66+
"""Test that very long descriptions are truncated at a word boundary."""
67+
long_description = "word " * 200 # 1000 chars, well over the 300 limit
68+
mock_get_entity.return_value = Node(
69+
id="TEST:002",
70+
category="biolink:Disease",
71+
name="Test Entity",
72+
description=long_description.strip(),
73+
provided_by="test",
74+
association_counts=[],
75+
)
76+
response = client.get("/v3/api/meta/TEST:002")
77+
78+
assert response.status_code == 200
79+
html = response.text
80+
assert "..." in html
81+
# Description (including "Test Entity - " prefix) should be truncated near the limit
82+
assert long_description.strip() not in html
83+
84+
85+
@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
86+
def test_meta_endpoint_entity_with_no_name(mock_get_entity, client):
87+
"""Test that entities without a name use the entity ID instead."""
88+
mock_get_entity.return_value = Node(
89+
id="TEST:003",
90+
category="biolink:Disease",
91+
name=None,
92+
description="A test description",
93+
provided_by="test",
94+
association_counts=[],
95+
)
96+
response = client.get("/v3/api/meta/TEST:003")
97+
98+
assert response.status_code == 200
99+
html = response.text
100+
assert "TEST:003 | Monarch Initiative" in html
101+
assert "A test description" in html
102+
103+
104+
@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
105+
def test_meta_endpoint_entity_with_no_description(mock_get_entity, client):
106+
"""Test that entities without a description still produce valid OG tags."""
107+
mock_get_entity.return_value = Node(
108+
id="TEST:004",
109+
category="biolink:Disease",
110+
name="Test Entity",
111+
description=None,
112+
provided_by="test",
113+
association_counts=[],
114+
)
115+
response = client.get("/v3/api/meta/TEST:004")
116+
117+
assert response.status_code == 200
118+
html = response.text
119+
assert "Test Entity | Monarch Initiative" in html
120+
assert 'og:description' in html
121+
122+
123+
@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
124+
def test_meta_endpoint_entity_with_no_name_or_description(mock_get_entity, client):
125+
"""Test that entities with neither name nor description use fallback text."""
126+
mock_get_entity.return_value = Node(
127+
id="TEST:005",
128+
category="biolink:Disease",
129+
name=None,
130+
description=None,
131+
provided_by="test",
132+
association_counts=[],
133+
)
134+
response = client.get("/v3/api/meta/TEST:005")
135+
136+
assert response.status_code == 200
137+
html = response.text
138+
assert "TEST:005 | Monarch Initiative" in html
139+
assert "View TEST:005 on Monarch Initiative" in html
140+
141+
142+
@patch("monarch_py.implementations.solr.solr_implementation.SolrImplementation.get_entity")
143+
def test_meta_endpoint_returns_cache_control_header(mock_get_entity, client, node):
144+
"""Test that the response includes a Cache-Control header."""
145+
mock_get_entity.return_value = Node(**node)
146+
response = client.get("/v3/api/meta/MONDO:0020121")
147+
148+
assert response.status_code == 200
149+
assert response.headers["cache-control"] == "public, max-age=3600"

backend/uv.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)