Skip to content

Commit eab834d

Browse files
authored
add arxiv latex. (#1335)
1 parent 16d99f2 commit eab834d

File tree

8 files changed

+909
-0
lines changed

8 files changed

+909
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.13.2

mcp_servers/arxiv_latex/Dockerfile

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
FROM alpine:latest AS base
2+
3+
WORKDIR /app
4+
5+
ENV LANG=C.UTF-8
6+
7+
RUN apk update && \
8+
apk upgrade && \
9+
apk add --no-cache \
10+
bash \
11+
curl \
12+
tini \
13+
coreutils \
14+
git
15+
16+
ENTRYPOINT ["/sbin/tini", "-s", "--"]
17+
18+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
19+
20+
ENV PATH="/root/.local/bin:${PATH}"
21+
22+
COPY mcp_servers/arxiv-latex-mcp/.python-version .
23+
24+
RUN uv venv
25+
26+
FROM base AS builder
27+
28+
COPY mcp_servers/arxiv-latex-mcp/ .
29+
30+
RUN uv sync
31+
32+
RUN uv build
33+
34+
FROM base AS runner
35+
36+
COPY --from=builder /app/dist/*.whl /app/
37+
38+
RUN uv pip install /app/*.whl
39+
40+
# Webshare proxy: set these env vars at runtime to route traffic through a proxy.
41+
# PROXY_USERNAME – required
42+
# PROXY_PASSWORD – required
43+
# PROXY_HOST – default: p.webshare.io
44+
# PROXY_PORT – default: 80
45+
46+
CMD ["uv", "run", "arxiv-latex-mcp"]

mcp_servers/arxiv_latex/LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2025 Takashi Ishida
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"dxt_version": "0.1",
3+
"name": "arxiv-latex-mcp",
4+
"version": "0.1.0",
5+
"description": "MCP server that uses arxiv-to-prompt to fetch and process arXiv LaTeX sources for precise interpretation of mathematical expressions in scientific papers.",
6+
"author": {
7+
"name": "Takashi Ishida",
8+
"url": "https://takashiishida.github.io"
9+
},
10+
"homepage": "https://github.com/takashiishida/arxiv-latex-mcp",
11+
"documentation": "https://github.com/takashiishida/arxiv-latex-mcp",
12+
"server": {
13+
"type": "python",
14+
"entry_point": "server/main.py",
15+
"mcp_config": {
16+
"command": "python3.10",
17+
"args": [
18+
"${__dirname}/server/main.py"
19+
],
20+
"env": {
21+
"PYTHONPATH": "${__dirname}/server/lib"
22+
}
23+
}
24+
},
25+
"license": "MIT",
26+
"repository": {
27+
"type": "git",
28+
"url": "https://github.com/takashiishida/arxiv-latex-mcp"
29+
},
30+
"compatibility": {
31+
"claude_desktop": ">=0.11.4",
32+
"platforms": ["darwin"],
33+
"runtimes": {
34+
"python": ">=3.10"
35+
}
36+
}
37+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
[project]
2+
name = "arxiv-latex-mcp"
3+
version = "0.2.1"
4+
description = "An MCP server that fetches and processes arXiv papers using LaTeX source for accurate equation handling"
5+
readme = "README.md"
6+
requires-python = ">=3.10"
7+
dependencies = [
8+
"httpx>=0.28.1",
9+
"mcp[cli]>=1.6.0",
10+
"arxiv-to-prompt>=0.10.0",
11+
"click>=8.0.0",
12+
"starlette>=0.46.0",
13+
"uvicorn>=0.34.0",
14+
]
15+
16+
[project.scripts]
17+
arxiv-latex-mcp = "server.main:main"
18+
19+
[build-system]
20+
requires = ["hatchling"]
21+
build-backend = "hatchling.build"
22+
23+
[tool.hatch.build.targets.wheel]
24+
packages = ["server"]

mcp_servers/arxiv_latex/server/__init__.py

Whitespace-only changes.
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
#!/usr/bin/env python3
2+
"""
3+
ArXiv LaTeX MCP Server
4+
5+
This server provides tools to fetch and process arXiv papers' LaTeX source code
6+
for better mathematical expression interpretation.
7+
"""
8+
9+
import contextlib
10+
import logging
11+
import os
12+
from collections.abc import AsyncIterator
13+
from typing import Any
14+
15+
import click
16+
import mcp.types as types
17+
import uvicorn
18+
from mcp.server.lowlevel import Server
19+
from mcp.server.sse import SseServerTransport
20+
from mcp.server.streamable_http_manager import StreamableHTTPSessionManager
21+
from starlette.applications import Starlette
22+
from starlette.responses import Response
23+
from starlette.routing import Mount, Route
24+
from starlette.types import Receive, Scope, Send
25+
26+
from arxiv_to_prompt import process_latex_source, list_sections, extract_section
27+
28+
# Configure logging
29+
logger = logging.getLogger(__name__)
30+
31+
ARXIV_MCP_SERVER_PORT = int(os.getenv("ARXIV_MCP_SERVER_PORT", "5000"))
32+
33+
34+
@click.command()
35+
@click.option(
36+
"--port", default=ARXIV_MCP_SERVER_PORT, help="Port to listen on for HTTP"
37+
)
38+
@click.option(
39+
"--log-level",
40+
default="INFO",
41+
help="Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
42+
)
43+
@click.option(
44+
"--json-response",
45+
is_flag=True,
46+
default=False,
47+
help="Enable JSON responses for StreamableHTTP instead of SSE streams",
48+
)
49+
def main(
50+
port: int,
51+
log_level: str,
52+
json_response: bool,
53+
) -> int:
54+
# Configure logging
55+
logging.basicConfig(
56+
level=getattr(logging, log_level.upper()),
57+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
58+
)
59+
60+
# Configure webshare proxy for arxiv_to_prompt (uses requests library).
61+
# Set PROXY_USERNAME and PROXY_PASSWORD env vars to enable.
62+
proxy_user = os.environ.get("PROXY_USERNAME")
63+
proxy_pass = os.environ.get("PROXY_PASSWORD")
64+
if proxy_user and proxy_pass:
65+
proxy_host = os.environ.get("PROXY_HOST", "p.webshare.io")
66+
proxy_port = os.environ.get("PROXY_PORT", "80")
67+
proxy_url = f"http://{proxy_user}:{proxy_pass}@{proxy_host}:{proxy_port}"
68+
os.environ.setdefault("HTTP_PROXY", proxy_url)
69+
os.environ.setdefault("HTTPS_PROXY", proxy_url)
70+
logger.info(f"Proxy configured: http://{proxy_host}:{proxy_port}")
71+
72+
# Create the MCP server instance
73+
app = Server("arxiv-latex-mcp")
74+
75+
@app.list_tools()
76+
async def handle_list_tools() -> list[types.Tool]:
77+
"""List available tools."""
78+
return [
79+
types.Tool(
80+
name="get_paper_prompt",
81+
description="Get a flattened LaTeX code of a paper from arXiv ID for precise interpretation of mathematical expressions",
82+
inputSchema={
83+
"type": "object",
84+
"properties": {
85+
"arxiv_id": {
86+
"type": "string",
87+
"description": "The arXiv ID of the paper (e.g., '2403.12345')",
88+
}
89+
},
90+
"required": ["arxiv_id"],
91+
},
92+
),
93+
types.Tool(
94+
name="get_paper_abstract",
95+
description="Get just the abstract of an arXiv paper (faster and cheaper than fetching the full paper)",
96+
inputSchema={
97+
"type": "object",
98+
"properties": {
99+
"arxiv_id": {
100+
"type": "string",
101+
"description": "The arXiv ID of the paper (e.g., '2403.12345')",
102+
}
103+
},
104+
"required": ["arxiv_id"],
105+
},
106+
),
107+
types.Tool(
108+
name="list_paper_sections",
109+
description="List section headings of an arXiv paper to see its structure",
110+
inputSchema={
111+
"type": "object",
112+
"properties": {
113+
"arxiv_id": {
114+
"type": "string",
115+
"description": "The arXiv ID of the paper (e.g., '2403.12345')",
116+
}
117+
},
118+
"required": ["arxiv_id"],
119+
},
120+
),
121+
types.Tool(
122+
name="get_paper_section",
123+
description="Get a specific section of an arXiv paper by section path (use list_paper_sections first to find available sections)",
124+
inputSchema={
125+
"type": "object",
126+
"properties": {
127+
"arxiv_id": {
128+
"type": "string",
129+
"description": "The arXiv ID of the paper (e.g., '2403.12345')",
130+
},
131+
"section_path": {
132+
"type": "string",
133+
"description": "The section path to extract (e.g., '1', '2.1', 'Introduction'). Use list_paper_sections to find available paths.",
134+
},
135+
},
136+
"required": ["arxiv_id", "section_path"],
137+
},
138+
),
139+
]
140+
141+
LATEX_RENDER_INSTRUCTIONS = """
142+
143+
IMPORTANT INSTRUCTIONS FOR RENDERING:
144+
When discussing this paper, please use dollar sign notation ($...$) for inline equations and double dollar signs ($$...$$) for display equations when providing responses that include LaTeX mathematical expressions.
145+
"""
146+
147+
@app.call_tool()
148+
async def handle_call_tool(
149+
name: str, arguments: dict[str, Any] | None
150+
) -> list[types.TextContent]:
151+
"""Handle tool calls."""
152+
if not arguments or "arxiv_id" not in arguments:
153+
raise ValueError("Missing required argument: arxiv_id")
154+
155+
arxiv_id = arguments["arxiv_id"]
156+
157+
try:
158+
if name == "get_paper_prompt":
159+
logger.info(f"Processing arXiv paper: {arxiv_id}")
160+
prompt = process_latex_source(arxiv_id)
161+
result = prompt + LATEX_RENDER_INSTRUCTIONS
162+
logger.info(f"Successfully processed arXiv paper: {arxiv_id}")
163+
164+
elif name == "get_paper_abstract":
165+
logger.info(f"Getting abstract for arXiv paper: {arxiv_id}")
166+
result = process_latex_source(arxiv_id, abstract_only=True)
167+
logger.info(f"Successfully got abstract for: {arxiv_id}")
168+
169+
elif name == "list_paper_sections":
170+
logger.info(f"Listing sections for arXiv paper: {arxiv_id}")
171+
text = process_latex_source(arxiv_id)
172+
sections = list_sections(text)
173+
result = "\n".join(sections)
174+
logger.info(f"Successfully listed sections for: {arxiv_id}")
175+
176+
elif name == "get_paper_section":
177+
if "section_path" not in arguments:
178+
raise ValueError("Missing required argument: section_path")
179+
section_path = arguments["section_path"]
180+
logger.info(f"Getting section '{section_path}' for arXiv paper: {arxiv_id}")
181+
text = process_latex_source(arxiv_id)
182+
result = extract_section(text, section_path)
183+
if result is None:
184+
result = f"Section '{section_path}' not found. Use list_paper_sections to see available sections."
185+
else:
186+
result = result + LATEX_RENDER_INSTRUCTIONS
187+
logger.info(f"Successfully got section for: {arxiv_id}")
188+
189+
else:
190+
raise ValueError(f"Unknown tool: {name}")
191+
192+
return [types.TextContent(type="text", text=result)]
193+
194+
except Exception as e:
195+
error_msg = f"Error processing arXiv paper {arxiv_id}: {str(e)}"
196+
logger.error(error_msg)
197+
198+
return [types.TextContent(type="text", text=error_msg)]
199+
200+
# Set up SSE transport
201+
sse = SseServerTransport("/messages/")
202+
203+
async def handle_sse(request):
204+
logger.info("Handling SSE connection")
205+
async with sse.connect_sse(
206+
request.scope, request.receive, request._send
207+
) as streams:
208+
await app.run(
209+
streams[0], streams[1], app.create_initialization_options()
210+
)
211+
return Response()
212+
213+
# Set up StreamableHTTP transport
214+
session_manager = StreamableHTTPSessionManager(
215+
app=app,
216+
event_store=None,
217+
json_response=json_response,
218+
stateless=True,
219+
)
220+
221+
async def handle_streamable_http(
222+
scope: Scope, receive: Receive, send: Send
223+
) -> None:
224+
logger.info("Handling StreamableHTTP request")
225+
await session_manager.handle_request(scope, receive, send)
226+
227+
@contextlib.asynccontextmanager
228+
async def lifespan(app: Starlette) -> AsyncIterator[None]:
229+
"""Context manager for session manager."""
230+
async with session_manager.run():
231+
logger.info("Application started with dual transports!")
232+
try:
233+
yield
234+
finally:
235+
logger.info("Application shutting down...")
236+
237+
# Create an ASGI application with routes for both transports
238+
starlette_app = Starlette(
239+
debug=True,
240+
routes=[
241+
# SSE routes
242+
Route("/sse", endpoint=handle_sse, methods=["GET"]),
243+
Mount("/messages/", app=sse.handle_post_message),
244+
# StreamableHTTP route
245+
Mount("/mcp", app=handle_streamable_http),
246+
],
247+
lifespan=lifespan,
248+
)
249+
250+
logger.info(f"Server starting on port {port} with dual transports:")
251+
logger.info(f" - SSE endpoint: http://localhost:{port}/sse")
252+
logger.info(f" - StreamableHTTP endpoint: http://localhost:{port}/mcp")
253+
254+
uvicorn.run(starlette_app, host="0.0.0.0", port=port)
255+
256+
return 0
257+
258+
259+
if __name__ == "__main__":
260+
main()

0 commit comments

Comments
 (0)