Skip to content

Commit 7e6b549

Browse files
committed
Add linkedin data api tools via RapidAPI
1 parent dfc71b2 commit 7e6b549

File tree

7 files changed

+817
-6
lines changed

7 files changed

+817
-6
lines changed

.env.example

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ AWS_ACCESS_KEY_ID=your_aws_access_key_id
2222
AWS_SECRET_ACCESS_KEY=your_aws_secret_access_key
2323
SENDER_EMAIL=your_sender_email@domain.com
2424

25-
# JINA API Key
25+
# External Services
2626
JINA_API_KEY="YOUR_JINA_API_KEY" # Leave blank if not using deep research
2727
BRAVE_SEARCH_API_KEY=""
28+
RAPIDAPI_KEY=""
2829

2930
# LLM Routing Configuration
3031
# GPT-4o-mini Instance 1

mxtoai/agents/email_agent.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,13 @@
4444
from mxtoai.scripts.report_formatter import ReportFormatter
4545
from mxtoai.scripts.visual_qa import azure_visualizer
4646
from mxtoai.tools.attachment_processing_tool import AttachmentProcessingTool
47-
from mxtoai.tools.deep_research_tool import DeepResearchTool
48-
from mxtoai.tools.schedule_tool import ScheduleTool
4947

50-
# Import the refactored fallback search tool
51-
from mxtoai.tools.search_with_fallback_tool import SearchWithFallbackTool
5248
# Import the new Brave Search tool
5349
from mxtoai.tools.brave_search_tool import initialize_brave_search_tool
50+
from mxtoai.tools.deep_research_tool import DeepResearchTool
51+
from mxtoai.tools.external_data.linkedin import initialize_linkedin_data_api_tool, initialize_linkedin_fresh_tool
52+
from mxtoai.tools.schedule_tool import ScheduleTool
53+
from mxtoai.tools.search_with_fallback_tool import SearchWithFallbackTool
5454

5555
# Load environment variables
5656
load_dotenv(override=True)
@@ -122,6 +122,14 @@ def __init__(
122122
if self.research_tool:
123123
self.available_tools.append(self.research_tool)
124124

125+
linkedin_fresh_tool = initialize_linkedin_fresh_tool()
126+
if linkedin_fresh_tool:
127+
self.available_tools.append(linkedin_fresh_tool)
128+
129+
linkedin_data_api_tool = initialize_linkedin_data_api_tool()
130+
if linkedin_data_api_tool:
131+
self.available_tools.append(linkedin_data_api_tool)
132+
125133
logger.info(f"Agent tools initialized: {[tool.name for tool in self.available_tools]}")
126134
self._init_agent()
127135
logger.info("Email agent initialized successfully")

mxtoai/prompts/template_prompts.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,13 @@
9797
# Background research handler template
9898
BACKGROUND_RESEARCH_TEMPLATE = """
9999
Research identities mentioned in email including names, email addresses, and domains. Focus on finding background information about the sender and other parties mentioned.
100-
Do not use deep search directly, use web search and page visit tool, if you're not satisfied with results, then only try deep search.
100+
Use web search, page visit and linkedin data tools.
101+
During your search, try to find best relevant profiles from websearch or linkedin results by looking at email content. Don't pick any random matching profile and mark research done.
102+
103+
**LinkedIn Data Strategy:**
104+
- For LinkedIn profile/company research, try linkedin_data tool first (uses LinkedIn URLs directly)
105+
- If linkedin_data fails or you need to search by criteria, use linkedin_data_api tool
106+
- Use web search first to find LinkedIn URLs or usernames for more accurate LinkedIn data requests
101107
102108
Response Requirements:
103109
1. Structure with clear sections:
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
"""
2+
External Data Module for MXtoAI.
3+
4+
This module provides integration with various external data sources to enhance
5+
the capabilities of the MXtoAI email agent.
6+
"""
7+
8+
from .linkedin import (
9+
LinkedInDataAPITool,
10+
LinkedInFreshDataTool,
11+
initialize_linkedin_data_api_tool,
12+
initialize_linkedin_fresh_tool,
13+
)
14+
15+
__all__ = [
16+
"LinkedInDataAPITool",
17+
"LinkedInFreshDataTool",
18+
"initialize_linkedin_data_api_tool",
19+
"initialize_linkedin_fresh_tool",
20+
]
21+
22+
# Version of the external data module
23+
__version__ = '0.2.0'
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"""
2+
LinkedIn data integration module for MXtoAI.
3+
Provides tools for accessing LinkedIn data through various APIs.
4+
"""
5+
6+
from .fresh_data import LinkedInFreshDataTool, initialize_linkedin_fresh_tool
7+
from .linkedin_data_api import LinkedInDataAPITool, initialize_linkedin_data_api_tool
8+
9+
__all__ = [
10+
"LinkedInDataAPITool",
11+
"LinkedInFreshDataTool",
12+
"initialize_linkedin_data_api_tool",
13+
"initialize_linkedin_fresh_tool",
14+
]
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
"""
2+
LinkedIn Fresh Data API implementation.
3+
Provides access to LinkedIn data through the Fresh LinkedIn Profile Data API.
4+
"""
5+
6+
import logging
7+
import os
8+
from typing import Optional
9+
10+
import requests
11+
from smolagents import Tool
12+
13+
logger = logging.getLogger(__name__)
14+
15+
class LinkedInFreshDataTool(Tool):
16+
"""Tool for accessing LinkedIn data through Fresh LinkedIn Profile Data API."""
17+
18+
name: str = "linkedin_fresh_data"
19+
description: str = "Access LinkedIn profile and company data directly from LinkedIn URLs for research and verification."
20+
output_type: str = "object"
21+
inputs: dict = { # noqa: RUF012
22+
"action": {
23+
"type": "string",
24+
"description": "The action to perform: 'get_linkedin_profile' or 'get_company_by_linkedin_url'",
25+
"enum": ["get_linkedin_profile", "get_company_by_linkedin_url"]
26+
},
27+
"linkedin_url": {
28+
"type": "string",
29+
"description": "The LinkedIn URL (profile or company)"
30+
},
31+
# Optional parameters for get_linkedin_profile action
32+
"include_skills": {
33+
"type": "boolean",
34+
"description": "Include skills section in response (default: false)",
35+
"default": False,
36+
"nullable": True
37+
},
38+
"include_certifications": {
39+
"type": "boolean",
40+
"description": "Include certifications section in response (default: false)",
41+
"default": False,
42+
"nullable": True
43+
},
44+
"include_publications": {
45+
"type": "boolean",
46+
"description": "Include publications section in response (default: false)",
47+
"default": False,
48+
"nullable": True
49+
},
50+
"include_honors": {
51+
"type": "boolean",
52+
"description": "Include honors and awards section in response (default: false)",
53+
"default": False,
54+
"nullable": True
55+
},
56+
"include_volunteers": {
57+
"type": "boolean",
58+
"description": "Include volunteer experience section in response (default: false)",
59+
"default": False,
60+
"nullable": True
61+
},
62+
"include_projects": {
63+
"type": "boolean",
64+
"description": "Include projects section in response (default: false)",
65+
"default": False,
66+
"nullable": True
67+
},
68+
"include_patents": {
69+
"type": "boolean",
70+
"description": "Include patents section in response (default: false)",
71+
"default": False,
72+
"nullable": True
73+
},
74+
"include_courses": {
75+
"type": "boolean",
76+
"description": "Include courses section in response (default: false)",
77+
"default": False,
78+
"nullable": True
79+
},
80+
"include_organizations": {
81+
"type": "boolean",
82+
"description": "Include organizations section in response (default: false)",
83+
"default": False,
84+
"nullable": True
85+
},
86+
"include_profile_status": {
87+
"type": "boolean",
88+
"description": "Include profile status information (default: false)",
89+
"default": False,
90+
"nullable": True
91+
},
92+
"include_company_public_url": {
93+
"type": "boolean",
94+
"description": "Include company public URL information (default: false)",
95+
"default": False,
96+
"nullable": True
97+
}
98+
}
99+
100+
def __init__(self, api_key: str):
101+
"""
102+
Initialize the LinkedIn Fresh Data tool.
103+
104+
Args:
105+
api_key: The RapidAPI key for authentication.
106+
"""
107+
super().__init__()
108+
if not api_key:
109+
raise ValueError("RapidAPI key is required for LinkedIn Fresh Data API.")
110+
self.api_key = api_key
111+
self.base_url = "https://fresh-linkedin-profile-data.p.rapidapi.com"
112+
self.headers = {
113+
"x-rapidapi-key": self.api_key,
114+
"x-rapidapi-host": "fresh-linkedin-profile-data.p.rapidapi.com"
115+
}
116+
117+
def forward(self,
118+
action: str,
119+
linkedin_url: str,
120+
include_skills: bool = False,
121+
include_certifications: bool = False,
122+
include_publications: bool = False,
123+
include_honors: bool = False,
124+
include_volunteers: bool = False,
125+
include_projects: bool = False,
126+
include_patents: bool = False,
127+
include_courses: bool = False,
128+
include_organizations: bool = False,
129+
include_profile_status: bool = False,
130+
include_company_public_url: bool = False) -> dict:
131+
"""
132+
Process LinkedIn data requests.
133+
134+
Args:
135+
action: The type of request to perform ('get_linkedin_profile' or 'get_company_by_linkedin_url')
136+
linkedin_url: The LinkedIn URL (profile or company)
137+
include_skills: Include skills section in response (default: false)
138+
include_certifications: Include certifications section in response (default: false)
139+
include_publications: Include publications section in response (default: false)
140+
include_honors: Include honors and awards section in response (default: false)
141+
include_volunteers: Include volunteer experience section in response (default: false)
142+
include_projects: Include projects section in response (default: false)
143+
include_patents: Include patents section in response (default: false)
144+
include_courses: Include courses section in response (default: false)
145+
include_organizations: Include organizations section in response (default: false)
146+
include_profile_status: Include profile status information (default: false)
147+
include_company_public_url: Include company public URL information (default: false)
148+
149+
Returns:
150+
Dict containing the results
151+
"""
152+
actions = {
153+
"get_linkedin_profile": self.get_linkedin_profile,
154+
"get_company_by_linkedin_url": self.get_company_by_linkedin_url
155+
}
156+
157+
if action not in actions:
158+
raise ValueError(f"Unsupported action: {action}")
159+
160+
try:
161+
if action == "get_linkedin_profile":
162+
return actions[action](
163+
linkedin_url=linkedin_url,
164+
include_skills=include_skills,
165+
include_certifications=include_certifications,
166+
include_publications=include_publications,
167+
include_honors=include_honors,
168+
include_volunteers=include_volunteers,
169+
include_projects=include_projects,
170+
include_patents=include_patents,
171+
include_courses=include_courses,
172+
include_organizations=include_organizations,
173+
include_profile_status=include_profile_status,
174+
include_company_public_url=include_company_public_url
175+
)
176+
else: # get_company_by_linkedin_url
177+
return actions[action](linkedin_url=linkedin_url)
178+
except requests.exceptions.RequestException as e:
179+
logger.error(f"LinkedIn Fresh Data API request failed: {e}")
180+
raise Exception(f"LinkedIn Fresh Data API request failed: {e}") from e
181+
except Exception as e:
182+
logger.error(f"Error processing LinkedIn Fresh Data API request: {e}")
183+
raise Exception(f"Failed to process LinkedIn Fresh Data API request: {e}") from e
184+
185+
def get_linkedin_profile(self,
186+
linkedin_url: str,
187+
include_skills: bool = False,
188+
include_certifications: bool = False,
189+
include_publications: bool = False,
190+
include_honors: bool = False,
191+
include_volunteers: bool = False,
192+
include_projects: bool = False,
193+
include_patents: bool = False,
194+
include_courses: bool = False,
195+
include_organizations: bool = False,
196+
include_profile_status: bool = False,
197+
include_company_public_url: bool = False) -> dict:
198+
"""
199+
Get detailed LinkedIn profile information from a LinkedIn profile URL.
200+
201+
Args:
202+
linkedin_url: LinkedIn profile URL (e.g., "https://www.linkedin.com/in/username/")
203+
include_skills: Include skills section in response
204+
include_certifications: Include certifications section in response
205+
include_publications: Include publications section in response
206+
include_honors: Include honors and awards section in response
207+
include_volunteers: Include volunteer experience section in response
208+
include_projects: Include projects section in response
209+
include_patents: Include patents section in response
210+
include_courses: Include courses section in response
211+
include_organizations: Include organizations section in response
212+
include_profile_status: Include profile status information
213+
include_company_public_url: Include company public URL information
214+
215+
Returns:
216+
Dict containing detailed profile information
217+
"""
218+
endpoint = "/get-linkedin-profile"
219+
params = {
220+
"linkedin_url": linkedin_url,
221+
"include_skills": str(include_skills).lower(),
222+
"include_certifications": str(include_certifications).lower(),
223+
"include_publications": str(include_publications).lower(),
224+
"include_honors": str(include_honors).lower(),
225+
"include_volunteers": str(include_volunteers).lower(),
226+
"include_projects": str(include_projects).lower(),
227+
"include_patents": str(include_patents).lower(),
228+
"include_courses": str(include_courses).lower(),
229+
"include_organizations": str(include_organizations).lower(),
230+
"include_profile_status": str(include_profile_status).lower(),
231+
"include_company_public_url": str(include_company_public_url).lower()
232+
}
233+
234+
response = requests.get(
235+
f"{self.base_url}{endpoint}",
236+
headers=self.headers,
237+
params=params
238+
)
239+
response.raise_for_status()
240+
return response.json()
241+
242+
def get_company_by_linkedin_url(self, linkedin_url: str) -> dict:
243+
"""
244+
Get company information from a LinkedIn company URL.
245+
246+
Args:
247+
linkedin_url: LinkedIn company URL (e.g., "https://www.linkedin.com/company/apple/")
248+
249+
Returns:
250+
Dict containing company information
251+
"""
252+
endpoint = "/get-company-by-linkedinurl"
253+
params = {
254+
"linkedin_url": linkedin_url
255+
}
256+
257+
response = requests.get(
258+
f"{self.base_url}{endpoint}",
259+
headers=self.headers,
260+
params=params
261+
)
262+
response.raise_for_status()
263+
return response.json()
264+
265+
266+
def initialize_linkedin_fresh_tool() -> Optional[LinkedInFreshDataTool]:
267+
"""
268+
Initializes the LinkedInFreshDataTool if the API key is available.
269+
270+
Returns:
271+
Optional[LinkedInFreshDataTool]: Initialized tool instance or None if initialization fails
272+
"""
273+
api_key = os.getenv("RAPIDAPI_KEY")
274+
if api_key:
275+
try:
276+
tool = LinkedInFreshDataTool(api_key=api_key)
277+
logger.debug("Initialized LinkedInFreshDataTool.")
278+
return tool # noqa: TRY300
279+
except ValueError as e:
280+
logger.warning(f"Failed to initialize LinkedInFreshDataTool: {e}")
281+
return None
282+
else:
283+
logger.warning(
284+
"LinkedInFreshDataTool not initialized. Missing RAPIDAPI_KEY environment variable."
285+
)
286+
return None

0 commit comments

Comments
 (0)