Skip to content

Commit 5fa069f

Browse files
kgriteshclaude
andcommitted
fix(mcp): prevent stdout pollution from corrupting stdio JSON-RPC protocol
Selenium/ChromeDriver writes stray output to stdout during initialization and scraping, which breaks the MCP stdio transport. Redirect stdout to stderr during scraper init and all tool handler calls when in stdio mode. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f847751 commit 5fa069f

File tree

1 file changed

+58
-21
lines changed

1 file changed

+58
-21
lines changed

src/linkedin_spider/mcp/server.py

Lines changed: 58 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import contextlib
12
import json
23
import logging
34
import os
45
import sys
6+
from collections.abc import Generator
57
from typing import Annotated
68

79
from cyclopts import App, Parameter
@@ -25,6 +27,23 @@
2527

2628
_scraper_instance = None
2729

30+
# Whether we're running in stdio mode (needs stdout protection)
31+
_stdio_mode = False
32+
33+
34+
@contextlib.contextmanager
35+
def _suppress_stdout() -> Generator[None, None, None]:
36+
"""Redirect stdout to stderr to prevent stray output from corrupting MCP JSON-RPC."""
37+
if not _stdio_mode:
38+
yield
39+
return
40+
original = sys.stdout
41+
sys.stdout = sys.stderr
42+
try:
43+
yield
44+
finally:
45+
sys.stdout = original
46+
2847

2948
def get_scraper():
3049
global _scraper_instance
@@ -40,7 +59,8 @@ async def scrape_profile(profile_url: str) -> str:
4059

4160
try:
4261
scraper = get_scraper()
43-
result = scraper.scrape_profile(profile_url)
62+
with _suppress_stdout():
63+
result = scraper.scrape_profile(profile_url)
4464

4565
if result:
4666
return json.dumps(result, indent=2, ensure_ascii=False)
@@ -82,7 +102,10 @@ async def search_profiles(
82102
if followers_of:
83103
filters["followers_of"] = followers_of
84104

85-
results = scraper.scrape_search_results(query, max_results, filters if filters else None)
105+
with _suppress_stdout():
106+
results = scraper.scrape_search_results(
107+
query, max_results, filters if filters else None
108+
)
86109

87110
if results:
88111
return f"profiles:\n{json.dumps(results, indent=2, ensure_ascii=False)}"
@@ -97,7 +120,8 @@ async def search_profiles(
97120
async def get_session_status() -> str:
98121
try:
99122
scraper = get_scraper()
100-
is_active = scraper.keep_alive()
123+
with _suppress_stdout():
124+
is_active = scraper.keep_alive()
101125
status = "Active" if is_active else "Inactive"
102126
except Exception as e:
103127
return f"Error checking session status: {e!s}"
@@ -110,7 +134,8 @@ async def reset_session() -> str:
110134
global _scraper_instance
111135
try:
112136
if _scraper_instance:
113-
_scraper_instance.close()
137+
with _suppress_stdout():
138+
_scraper_instance.close()
114139
_scraper_instance = None
115140
except Exception as e:
116141
return f"Error resetting session: {e!s}"
@@ -122,7 +147,8 @@ async def reset_session() -> str:
122147
async def scrape_incoming_connections(max_results: int = 10) -> str:
123148
try:
124149
scraper = get_scraper()
125-
results = scraper.scrape_incoming_connections(max_results)
150+
with _suppress_stdout():
151+
results = scraper.scrape_incoming_connections(max_results)
126152

127153
if results:
128154
return f"incoming_connections:\n{json.dumps(results, indent=2, ensure_ascii=False)}"
@@ -137,7 +163,8 @@ async def scrape_incoming_connections(max_results: int = 10) -> str:
137163
async def scrape_outgoing_connections(max_results: int = 10) -> str:
138164
try:
139165
scraper = get_scraper()
140-
results = scraper.scrape_outgoing_connections(max_results)
166+
with _suppress_stdout():
167+
results = scraper.scrape_outgoing_connections(max_results)
141168

142169
if results:
143170
return f"outgoing_connections:\n{json.dumps(results, indent=2, ensure_ascii=False)}"
@@ -155,7 +182,8 @@ async def scrape_company(company_url: str) -> str:
155182

156183
try:
157184
scraper = get_scraper()
158-
result = scraper.scrape_company(company_url)
185+
with _suppress_stdout():
186+
result = scraper.scrape_company(company_url)
159187

160188
if result:
161189
return f"company_profile:\n{json.dumps(result, indent=2, ensure_ascii=False)}"
@@ -191,9 +219,10 @@ async def search_posts(
191219

192220
try:
193221
scraper = get_scraper()
194-
results = scraper.search_posts(
195-
keywords, max_results, scroll_pause, max_comments, date_posted
196-
)
222+
with _suppress_stdout():
223+
results = scraper.search_posts(
224+
keywords, max_results, scroll_pause, max_comments, date_posted
225+
)
197226

198227
if results:
199228
return f"posts:\n{json.dumps(results, indent=2, ensure_ascii=False)}"
@@ -208,7 +237,8 @@ async def search_posts(
208237
async def scrape_conversations_list(max_results: int = 10) -> str:
209238
try:
210239
scraper = get_scraper()
211-
conversations = scraper.scrape_conversations_list(max_results)
240+
with _suppress_stdout():
241+
conversations = scraper.scrape_conversations_list(max_results)
212242

213243
if conversations:
214244
return f"conversations_list:\n{json.dumps(conversations, indent=2, ensure_ascii=False)}"
@@ -223,7 +253,8 @@ async def scrape_conversations_list(max_results: int = 10) -> str:
223253
async def scrape_conversation(participant_name: str | None = None) -> str:
224254
try:
225255
scraper = get_scraper()
226-
conversation_data = scraper.scrape_conversation_messages(participant_name)
256+
with _suppress_stdout():
257+
conversation_data = scraper.scrape_conversation_messages(participant_name)
227258

228259
if conversation_data and conversation_data.get("messages"):
229260
return f"conversation:\n{json.dumps(conversation_data, indent=2, ensure_ascii=False)}"
@@ -242,7 +273,8 @@ async def send_connection_request(profile_url: str, note: str | None = None) ->
242273

243274
try:
244275
scraper = get_scraper()
245-
success = scraper.send_connection_request(profile_url, note)
276+
with _suppress_stdout():
277+
success = scraper.send_connection_request(profile_url, note)
246278

247279
result = {
248280
"profile_url": profile_url,
@@ -308,16 +340,21 @@ def serve(
308340
] = None,
309341
):
310342
"""Start the LinkedIn MCP server."""
343+
global _stdio_mode
311344
logger.info(f"Starting LinkedIn MCP {transport.upper()} Server...")
312345

313-
try:
314-
logger.info("Initializing LinkedIn scraper...")
315-
_initialize_scraper(email, password, cookie, headless, user_agent, proxy)
316-
logger.info("LinkedIn scraper initialized successfully")
317-
except Exception:
318-
logger.exception("Failed to initialize scraper")
319-
logger.exception("Cannot start server without valid LinkedIn credentials")
320-
sys.exit(1)
346+
if transport == "stdio":
347+
_stdio_mode = True
348+
349+
with _suppress_stdout():
350+
try:
351+
logger.info("Initializing LinkedIn scraper...")
352+
_initialize_scraper(email, password, cookie, headless, user_agent, proxy)
353+
logger.info("LinkedIn scraper initialized successfully")
354+
except Exception:
355+
logger.exception("Failed to initialize scraper")
356+
logger.exception("Cannot start server without valid LinkedIn credentials")
357+
sys.exit(1)
321358

322359
logger.info(
323360
f"FastMCP {transport.upper()} Server initialized with tools: scrape_profile, search_profiles, scrape_company, "

0 commit comments

Comments
 (0)