Skip to content

Commit 645a302

Browse files
kgriteshclaude
andcommitted
test(mcp): add e2e tests for tool registration, validation, and integration
Phase 1 (no credentials): tool registration schemas, parameter validation, uninitialized scraper error handling (29 tests). Phase 2 (integration): full e2e through MCP protocol for all 11 tools (20 tests). Adds pytest-asyncio auto mode and shared fixtures. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 116db01 commit 645a302

File tree

4 files changed

+768
-0
lines changed

4 files changed

+768
-0
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ linkedin_spider = ["py.typed"]
7979
"*" = ["*.md", "*.txt", "*.yml", "*.yaml"]
8080

8181
[tool.pytest.ini_options]
82+
asyncio_mode = "auto"
8283
testpaths = ["tests"]
8384
python_files = ["test_*.py", "*_test.py"]
8485
addopts = [

tests/conftest.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from __future__ import annotations
2+
3+
import os
4+
from collections.abc import AsyncGenerator, Generator
5+
6+
import pytest
7+
from dotenv import load_dotenv
8+
from fastmcp import Client
9+
10+
import linkedin_spider.mcp.server as mcp_server
11+
from linkedin_spider.core.config import ScraperConfig
12+
from linkedin_spider.core.scraper import LinkedinSpider
13+
from linkedin_spider.mcp.server import mcp_app
14+
15+
load_dotenv()
16+
17+
18+
@pytest.fixture(scope="session")
19+
def spider() -> Generator[LinkedinSpider, None, None]:
20+
"""Session-scoped LinkedinSpider authenticated via LINKEDIN_COOKIE from .env."""
21+
cookie = os.environ.get("LINKEDIN_COOKIE")
22+
if not cookie:
23+
pytest.skip("LINKEDIN_COOKIE not set in environment")
24+
25+
config = ScraperConfig(headless=True)
26+
scraper = LinkedinSpider(li_at_cookie=cookie, config=config)
27+
yield scraper
28+
scraper.close()
29+
30+
31+
@pytest.fixture(scope="session")
32+
def mcp_scraper(spider: LinkedinSpider) -> Generator[LinkedinSpider, None, None]:
33+
"""Inject the session-scoped spider into the MCP server global, reset on teardown."""
34+
mcp_server._scraper_instance = spider
35+
yield spider
36+
mcp_server._scraper_instance = None
37+
38+
39+
@pytest.fixture
40+
async def mcp_client() -> AsyncGenerator[Client, None]:
41+
"""Provide a connected FastMCP Client for in-memory MCP testing."""
42+
async with Client(mcp_app) as client:
43+
yield client

tests/test_e2e.py

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
"""End-to-end tests for linkedin-spider scraper actions.
2+
3+
One scrape call per test. Non-deterministic outputs are validated by
4+
structure, types, and minimum result counts — not exact values.
5+
Parametrized where multiple inputs exercise different code paths.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
from typing import Any
11+
12+
import pytest
13+
14+
from linkedin_spider.core.scraper import LinkedinSpider
15+
16+
PROFILE_URLS = [
17+
"https://www.linkedin.com/in/williamhgates/",
18+
"https://www.linkedin.com/in/satyanadella/",
19+
]
20+
COMPANY_URLS = [
21+
"https://www.linkedin.com/company/microsoft/",
22+
"https://www.linkedin.com/company/google/",
23+
]
24+
PROFILE_EXPECTED_KEYS = {
25+
"name",
26+
"headline",
27+
"location",
28+
"about",
29+
"experience",
30+
"education",
31+
"profile_url",
32+
}
33+
COMPANY_EXPECTED_KEYS = {
34+
"name",
35+
"company_url",
36+
"tagline",
37+
"industry",
38+
"location",
39+
"followers",
40+
"employee_count",
41+
}
42+
43+
44+
def _assert_non_empty_str(value: Any, field: str) -> None:
45+
assert isinstance(value, str), f"{field} should be str, got {type(value)}"
46+
assert value and value != "N/A", f"{field} should not be empty/N/A"
47+
48+
49+
# ── profile ────────────────────────────────────────────────────────────────
50+
51+
52+
@pytest.mark.integration
53+
@pytest.mark.parametrize("profile_url", PROFILE_URLS)
54+
def test_scrape_profile(spider: LinkedinSpider, profile_url: str) -> None:
55+
result = spider.scrape_profile(profile_url)
56+
57+
assert result is not None, "scrape_profile returned None"
58+
assert PROFILE_EXPECTED_KEYS.issubset(result.keys()), (
59+
f"Missing keys: {PROFILE_EXPECTED_KEYS - result.keys()}"
60+
)
61+
_assert_non_empty_str(result["name"], "name")
62+
assert isinstance(result["experience"], list)
63+
assert isinstance(result["education"], list)
64+
assert result["profile_url"] == profile_url
65+
66+
67+
@pytest.mark.integration
68+
def test_scrape_profile_invalid_url(spider: LinkedinSpider) -> None:
69+
assert spider.scrape_profile("https://example.com/not-a-profile") is None
70+
71+
72+
# ── company ────────────────────────────────────────────────────────────────
73+
74+
75+
@pytest.mark.integration
76+
@pytest.mark.parametrize("company_url", COMPANY_URLS)
77+
def test_scrape_company(spider: LinkedinSpider, company_url: str) -> None:
78+
result = spider.scrape_company(company_url)
79+
80+
assert result is not None, "scrape_company returned None"
81+
assert COMPANY_EXPECTED_KEYS.issubset(result.keys()), (
82+
f"Missing keys: {COMPANY_EXPECTED_KEYS - result.keys()}"
83+
)
84+
_assert_non_empty_str(result["name"], "name")
85+
assert result["company_url"] == company_url
86+
87+
88+
@pytest.mark.integration
89+
def test_scrape_company_invalid_url(spider: LinkedinSpider) -> None:
90+
assert spider.scrape_company("https://example.com/not-a-company") is None
91+
92+
93+
# ── search profiles ───────────────────────────────────────────────────────
94+
95+
96+
@pytest.mark.integration
97+
@pytest.mark.parametrize(
98+
("query", "max_results", "filters"),
99+
[
100+
("software engineer", 3, None),
101+
("data scientist", 2, {"location": "San Francisco"}),
102+
("product manager", 2, {"industry": "Technology, Information and Internet"}),
103+
],
104+
ids=["no-filter", "location-filter", "industry-filter"],
105+
)
106+
def test_search_profiles(
107+
spider: LinkedinSpider,
108+
query: str,
109+
max_results: int,
110+
filters: dict[str, str] | None,
111+
) -> None:
112+
results = spider.search_profiles(query, max_results=max_results, filters=filters)
113+
114+
assert isinstance(results, list)
115+
assert len(results) >= 1, f"Expected at least 1 result for '{query}'"
116+
assert len(results) <= max_results
117+
118+
first = results[0]
119+
expected_keys = {"name", "headline", "location", "profile_url"}
120+
assert expected_keys.issubset(first.keys()), f"Missing keys: {expected_keys - first.keys()}"
121+
122+
123+
# ── search posts ──────────────────────────────────────────────────────────
124+
125+
126+
@pytest.mark.integration
127+
@pytest.mark.parametrize(
128+
("keywords", "date_posted"),
129+
[
130+
("artificial intelligence", None),
131+
("startup funding", "past-week"),
132+
],
133+
ids=["no-date-filter", "past-week"],
134+
)
135+
def test_search_posts(
136+
spider: LinkedinSpider,
137+
keywords: str,
138+
date_posted: str | None,
139+
) -> None:
140+
results = spider.search_posts(
141+
keywords,
142+
max_results=2,
143+
max_comments=0,
144+
date_posted=date_posted,
145+
)
146+
147+
assert isinstance(results, list)
148+
assert len(results) >= 1, f"Expected at least 1 post for '{keywords}'"
149+
150+
post = results[0]
151+
expected_keys = {
152+
"author_name",
153+
"author_headline",
154+
"author_profile_url",
155+
"post_text",
156+
"hashtags",
157+
"links",
158+
"post_url",
159+
"likes_count",
160+
"comments_count",
161+
"reposts_count",
162+
}
163+
assert expected_keys.issubset(post.keys()), f"Missing keys: {expected_keys - post.keys()}"
164+
assert isinstance(post["likes_count"], int)
165+
assert isinstance(post["comments_count"], int)
166+
assert isinstance(post["reposts_count"], int)
167+
assert isinstance(post["hashtags"], list)
168+
assert isinstance(post["links"], list)
169+
170+
171+
# ── conversations list ────────────────────────────────────────────────────
172+
173+
174+
@pytest.mark.integration
175+
def test_scrape_conversations_list(spider: LinkedinSpider) -> None:
176+
results = spider.scrape_conversations_list(max_results=3)
177+
178+
assert isinstance(results, list)
179+
if not results:
180+
pytest.skip("No conversations available")
181+
182+
convo = results[0]
183+
expected_keys = {"participant_name", "timestamp", "message_snippet"}
184+
assert expected_keys.issubset(convo.keys()), f"Missing keys: {expected_keys - convo.keys()}"
185+
186+
187+
# ── conversation messages ─────────────────────────────────────────────────
188+
189+
190+
@pytest.mark.integration
191+
def test_scrape_conversation_messages(spider: LinkedinSpider) -> None:
192+
result = spider.scrape_conversation_messages()
193+
194+
assert result is not None
195+
assert "messages" in result
196+
assert "total_messages" in result
197+
assert isinstance(result["messages"], list)
198+
assert isinstance(result["total_messages"], int)
199+
200+
201+
# ── incoming connections ──────────────────────────────────────────────────
202+
203+
204+
@pytest.mark.integration
205+
def test_scrape_incoming_connections(spider: LinkedinSpider) -> None:
206+
results = spider.scrape_incoming_connections(max_results=3)
207+
208+
assert isinstance(results, list)
209+
if not results:
210+
pytest.skip("No incoming connections available")
211+
212+
conn = results[0]
213+
expected_keys = {"name", "profile_url", "headline"}
214+
assert expected_keys.issubset(conn.keys()), f"Missing keys: {expected_keys - conn.keys()}"
215+
216+
217+
# ── outgoing connections ──────────────────────────────────────────────────
218+
219+
220+
@pytest.mark.integration
221+
def test_scrape_outgoing_connections(spider: LinkedinSpider) -> None:
222+
results = spider.scrape_outgoing_connections(max_results=3)
223+
224+
assert isinstance(results, list)
225+
if not results:
226+
pytest.skip("No outgoing connections available")
227+
228+
conn = results[0]
229+
expected_keys = {"name", "profile_url", "headline"}
230+
assert expected_keys.issubset(conn.keys()), f"Missing keys: {expected_keys - conn.keys()}"
231+
232+
233+
# ── keep alive ────────────────────────────────────────────────────────────
234+
235+
236+
@pytest.mark.integration
237+
def test_keep_alive(spider: LinkedinSpider) -> None:
238+
assert spider.keep_alive() is True

0 commit comments

Comments
 (0)