Skip to content

Commit c9e9d0f

Browse files
committed
feature: improve
1 parent d3ee828 commit c9e9d0f

31 files changed

+3324
-0
lines changed

.github/scripts/linkedin_sync.py

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Enhanced LinkedIn Profile Scraper for GitHub Pages
4+
5+
This script fetches data from LinkedIn and updates data files for a GitHub Pages site.
6+
"""
7+
8+
import os
9+
import re
10+
import yaml
11+
import json
12+
import time
13+
import glob
14+
import logging
15+
from pathlib import Path
16+
from datetime import datetime
17+
from typing import Dict, List, Optional, Any
18+
19+
# Configure logging
20+
logging.basicConfig(
21+
level=logging.INFO,
22+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
23+
)
24+
logger = logging.getLogger('linkedin_sync')
25+
26+
# Configuration
27+
LINKEDIN_URL = "https://www.linkedin.com/in/notawar/"
28+
DATA_DIR = "_data"
29+
CV_FILE_PATH = f"{DATA_DIR}/cv.yml"
30+
ABOUT_FILE_PATH = f"{DATA_DIR}/about.yml"
31+
LAST_UPDATED_FILE = f"{DATA_DIR}/linkedin_last_updated.txt"
32+
LINKEDIN_BACKUP_PATH = f"{DATA_DIR}/linkedin_raw_data.json"
33+
CATEGORIES_FILE_PATH = f"{DATA_DIR}/categories.yml"
34+
POSTS_DIR = "_posts"
35+
LINKEDIN_EXTENDED_DIR = "_linkedin_extended"
36+
SITE_URL = "https://NotAwar.github.io"
37+
38+
def fetch_linkedin_data() -> Dict[str, Any]:
39+
"""
40+
Simplified mock function to fetch LinkedIn data.
41+
In a real environment, replace with actual API calls or web scraping.
42+
43+
For testing purposes, we'll use hardcoded sample data.
44+
"""
45+
logger.info("Fetching LinkedIn data from %s", LINKEDIN_URL)
46+
47+
# In a real implementation, fetch from LinkedIn API or web scrape
48+
# For now, return sample data
49+
sample_data = {
50+
"basic_info": {
51+
"name": "Awar",
52+
"headline": "Senior Engineer",
53+
"location": "Your City, Country",
54+
"summary": "Experienced Senior Engineer specializing in cloud architecture and distributed systems."
55+
},
56+
"experience": [
57+
{
58+
"title": "Senior Engineer",
59+
"company": "Current Company",
60+
"date_range": "January 2021 - Present",
61+
"description": "Leading cloud-native platform development and optimization."
62+
},
63+
{
64+
"title": "Software Engineer",
65+
"company": "Previous Company",
66+
"date_range": "June 2018 - December 2020",
67+
"description": "Developed and maintained high-performance APIs and services."
68+
}
69+
],
70+
"education": [
71+
{
72+
"degree": "Master's in Computer Science",
73+
"school": "University Name",
74+
"date_range": "2018 - 2020",
75+
"description": "Focused on distributed systems and machine learning."
76+
}
77+
],
78+
"skills": [
79+
"Cloud Architecture",
80+
"AWS",
81+
"Kubernetes",
82+
"Python",
83+
"Leadership",
84+
"System Design",
85+
"CI/CD",
86+
"DevOps"
87+
],
88+
"certifications": [
89+
"AWS Certified Solutions Architect - Professional",
90+
"Google Cloud Professional Engineer"
91+
]
92+
}
93+
94+
return sample_data
95+
96+
def format_date(date_str: str) -> str:
97+
"""Convert date formats to DD/MM/YYYY format"""
98+
if not date_str:
99+
return ""
100+
101+
# Handle "Present" in date ranges
102+
date_parts = date_str.split(" - ")
103+
result = []
104+
105+
for part in date_parts:
106+
if part.lower() == "present":
107+
result.append("Present")
108+
else:
109+
# Try various date formats
110+
for fmt in ["%b %Y", "%B %Y", "%m/%Y", "%Y"]:
111+
try:
112+
date_obj = datetime.strptime(part, fmt)
113+
result.append(date_obj.strftime("%d/%m/%Y"))
114+
break
115+
except ValueError:
116+
continue
117+
else:
118+
# If no format matched, keep original
119+
result.append(part)
120+
121+
return " - ".join(result)
122+
123+
def update_cv_file(data: Dict[str, Any]) -> None:
124+
"""Update the CV data file with LinkedIn information"""
125+
logger.info("Updating CV data file")
126+
127+
cv_data = {
128+
"basics": {
129+
"name": data["basic_info"]["name"],
130+
"label": data["basic_info"]["headline"],
131+
"summary": data["basic_info"]["summary"],
132+
"location": data["basic_info"]["location"],
133+
"website": SITE_URL,
134+
"profiles": [
135+
{
136+
"network": "LinkedIn",
137+
"url": LINKEDIN_URL
138+
}
139+
]
140+
},
141+
"work": [],
142+
"education": [],
143+
"skills": data["skills"],
144+
"certifications": data["certifications"]
145+
}
146+
147+
# Add work experience
148+
for job in data["experience"]:
149+
cv_data["work"].append({
150+
"position": job["title"],
151+
"company": job["company"],
152+
"date": format_date(job["date_range"]),
153+
"summary": job["description"]
154+
})
155+
156+
# Add education
157+
for edu in data["education"]:
158+
cv_data["education"].append({
159+
"degree": edu["degree"],
160+
"institution": edu["school"],
161+
"date": format_date(edu["date_range"]),
162+
"description": edu["description"]
163+
})
164+
165+
# Ensure directory exists
166+
os.makedirs(os.path.dirname(CV_FILE_PATH), exist_ok=True)
167+
168+
# Write YAML file
169+
with open(CV_FILE_PATH, 'w') as file:
170+
yaml.dump(cv_data, file, default_flow_style=False)
171+
172+
def update_about_file(data: Dict[str, Any]) -> None:
173+
"""Update the About page data file with LinkedIn information"""
174+
logger.info("Updating About page data file")
175+
176+
# Create about data structure
177+
about_data = {
178+
"name": data["basic_info"]["name"],
179+
"tagline": data["basic_info"]["headline"],
180+
"summary": data["basic_info"]["summary"],
181+
"location": data["basic_info"]["location"],
182+
"linkedin_url": LINKEDIN_URL,
183+
"current_position": {
184+
"title": data["experience"][0]["title"] if data["experience"] else "",
185+
"company": data["experience"][0]["company"] if data["experience"] else ""
186+
},
187+
"skills": data["skills"][:8] if len(data["skills"]) >= 8 else data["skills"]
188+
}
189+
190+
# Ensure directory exists
191+
os.makedirs(os.path.dirname(ABOUT_FILE_PATH), exist_ok=True)
192+
193+
# Write YAML file
194+
with open(ABOUT_FILE_PATH, 'w') as file:
195+
yaml.dump(about_data, file, default_flow_style=False)
196+
197+
def update_categories(data: Dict[str, Any]) -> None:
198+
"""Create categories based on LinkedIn skills and experience"""
199+
logger.info("Updating categories from LinkedIn data")
200+
201+
categories = {
202+
"companies": [],
203+
"skills": []
204+
}
205+
206+
# Add companies
207+
for job in data["experience"]:
208+
company = job["company"].strip()
209+
if company and company not in categories["companies"]:
210+
categories["companies"].append(company)
211+
212+
# Add skills
213+
for skill in data["skills"]:
214+
categories["skills"].append(skill.strip())
215+
216+
# Ensure directory exists
217+
os.makedirs(os.path.dirname(CATEGORIES_FILE_PATH), exist_ok=True)
218+
219+
# Write YAML file
220+
with open(CATEGORIES_FILE_PATH, 'w') as file:
221+
yaml.dump(categories, file, default_flow_style=False)
222+
223+
def update_linkedin_urls(data: Dict[str, Any]) -> None:
224+
"""Update all references to LinkedIn URLs to ensure consistency"""
225+
logger.info("Updating LinkedIn URLs in files")
226+
227+
# Update in all markdown files
228+
md_files = list(Path(".").glob("**/*.md")) + list(Path(".").glob("**/*.html"))
229+
230+
for file_path in md_files:
231+
if "_site" in str(file_path) or ".git" in str(file_path):
232+
continue
233+
234+
try:
235+
with open(file_path, 'r', encoding='utf-8') as file:
236+
content = file.read()
237+
238+
# Replace different variations of LinkedIn URLs with the canonical one
239+
content = re.sub(
240+
r'https?://(?:www\.)?linkedin\.com/in/notawar/?(?:\?.*)?',
241+
LINKEDIN_URL,
242+
content
243+
)
244+
245+
with open(file_path, 'w', encoding='utf-8') as file:
246+
file.write(content)
247+
248+
except Exception as e:
249+
logger.error(f"Error updating LinkedIn URL in {file_path}: {e}")
250+
251+
def update_last_updated_file() -> None:
252+
"""Update the last updated timestamp file"""
253+
os.makedirs(os.path.dirname(LAST_UPDATED_FILE), exist_ok=True)
254+
with open(LAST_UPDATED_FILE, 'w') as file:
255+
file.write(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
256+
257+
def main() -> None:
258+
"""Main function to coordinate the LinkedIn data sync process"""
259+
logger.info("Starting LinkedIn profile sync")
260+
261+
try:
262+
# Fetch LinkedIn data
263+
linkedin_data = fetch_linkedin_data()
264+
265+
# Save raw data for backup/debugging
266+
os.makedirs(os.path.dirname(LINKEDIN_BACKUP_PATH), exist_ok=True)
267+
with open(LINKEDIN_BACKUP_PATH, 'w') as f:
268+
json.dump(linkedin_data, f, indent=2)
269+
270+
# Update files
271+
update_cv_file(linkedin_data)
272+
update_about_file(linkedin_data)
273+
update_categories(linkedin_data)
274+
update_linkedin_urls(linkedin_data)
275+
update_last_updated_file()
276+
277+
logger.info("LinkedIn profile sync completed successfully")
278+
279+
except Exception as e:
280+
logger.error(f"Error during LinkedIn sync: {e}", exc_info=True)
281+
exit(1)
282+
283+
if __name__ == "__main__":
284+
main()
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
name: LinkedIn Profile Sync
2+
3+
on:
4+
schedule:
5+
# Run daily at midnight
6+
- cron: '0 0 * * *'
7+
# Allow manual triggering
8+
workflow_dispatch:
9+
10+
jobs:
11+
sync-linkedin:
12+
runs-on: ubuntu-latest
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v3
16+
with:
17+
fetch-depth: 0
18+
19+
- name: Set up Python
20+
uses: actions/setup-python@v4
21+
with:
22+
python-version: '3.x'
23+
24+
- name: Install dependencies
25+
run: |
26+
python -m pip install --upgrade pip
27+
pip install requests beautifulsoup4 python-dotenv pyyaml selenium webdriver-manager
28+
29+
- name: Run LinkedIn scraper
30+
env:
31+
LINKEDIN_EMAIL: ${{ secrets.LINKEDIN_EMAIL }}
32+
LINKEDIN_PASSWORD: ${{ secrets.LINKEDIN_PASSWORD }}
33+
run: |
34+
python .github/scripts/linkedin_sync.py
35+
36+
- name: Commit and push if changes
37+
run: |
38+
git config --local user.email "[email protected]"
39+
git config --local user.name "GitHub Action"
40+
git add .
41+
git diff --quiet && git diff --staged --quiet || (git commit -m "Update content from LinkedIn profile" && git push)
42+
43+
- name: Check for changes
44+
id: changes
45+
run: |
46+
if git diff --name-only HEAD~1 HEAD | grep -q '_data/'; then
47+
echo "changes=true" >> $GITHUB_OUTPUT
48+
else
49+
echo "changes=false" >> $GITHUB_OUTPUT
50+
fi
51+
52+
- name: Notify about update (if applicable)
53+
if: steps.changes.outputs.changes == 'true'
54+
uses: actions/github-script@v6
55+
with:
56+
github-token: ${{ secrets.GITHUB_TOKEN }}
57+
script: |
58+
github.rest.issues.createComment({
59+
owner: context.repo.owner,
60+
repo: context.repo.repo,
61+
issue_number: 1, // Create an ongoing issue #1 for tracking updates
62+
body: `LinkedIn profile data was updated on ${new Date().toISOString().split('T')[0]}`
63+
});

.gitignore

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
_site/
2+
.sass-cache/
3+
.jekyll-cache/
4+
.jekyll-metadata
5+
.bundle/
6+
vendor/
7+
Gemfile.lock
8+
.DS_Store
9+
*~

.markdownlint.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"default": true,
3+
"MD013": { "line_length": 120 },
4+
"MD033": false,
5+
"MD041": false,
6+
"MD046": {
7+
"style": "fenced"
8+
}
9+
}

0 commit comments

Comments
 (0)