Skip to content

Commit 1b37618

Browse files
author
Jeremy Dai
authored
update (#12)
1 parent c4afc1c commit 1b37618

File tree

2 files changed

+59
-23
lines changed

2 files changed

+59
-23
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,7 @@ _site/
7070
_dev/
7171

7272
# macos system files
73-
.DS_Store
73+
.DS_Store
74+
75+
# folder
76+
local/

scripts/get_manifest.py

Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,66 @@
11
import sys
22
import requests
3-
import json
4-
import openai
3+
import os
54

65

7-
def scrape_url(url):
6+
def is_valid_markdown(content):
7+
# Basic check for valid Markdown (customize as needed)
8+
return bool(content.strip()) and content != 'Not Found'
9+
10+
11+
def fetch_readme(url):
12+
# Convert GitHub blob URL to raw URL and append /README.md
13+
if 'blob' in url:
14+
raw_url = url.replace('blob', 'raw') + '/README.md'
15+
else:
16+
raw_url = url + '/raw/main/README.md'
17+
818
try:
9-
response = requests.get(url, timeout=10)
19+
# Try fetching from the raw URL
20+
print(f"Fetching README from: {raw_url}")
21+
response = requests.get(raw_url, timeout=10)
1022
response.raise_for_status()
11-
content = response.json()
12-
13-
# for testing
14-
with open('scraped_content.txt', 'w', encoding='utf-8') as f:
15-
f.write(content)
16-
print(f"Scraped content from {url}")
17-
except Exception as e:
18-
print(f"Error scraping {url}: {e}")
23+
24+
# Check if content is valid Markdown
25+
if not is_valid_markdown(response.text):
26+
# Fallback to /master/ if /main/ fails
27+
raw_url = raw_url.replace('/main/', '/master/')
28+
print(f"Retrying with: {raw_url}")
29+
response = requests.get(raw_url, timeout=10)
30+
response.raise_for_status()
31+
32+
if not is_valid_markdown(response.text):
33+
print(
34+
f"Error: Fetched content from {raw_url} is not valid Markdown")
35+
sys.exit(1)
36+
37+
# Ensure the output directory exists
38+
# Change to desired folder (e.g., 'downloads' or '')
39+
output_dir = 'local'
40+
if output_dir and not os.path.exists(output_dir):
41+
os.makedirs(output_dir)
42+
43+
# Save the README content in the local folder
44+
output_file = os.path.join(
45+
output_dir, 'readme.md') if output_dir else 'readme.md'
46+
with open(output_file, 'w', encoding='utf-8') as f:
47+
f.write(response.text)
48+
print(f"Saved README to {output_file}")
49+
50+
except requests.RequestException as e:
51+
print(f"Error fetching README from {raw_url}: {e}")
1952
sys.exit(1)
2053

2154

2255
if __name__ == "__main__":
23-
issue_body = sys.argv[1]
24-
# Extract URL from issue body (assumes link is on its own line)
25-
# TODO: need a more robust way to extract the URL
26-
for line in issue_body.split('\n'):
27-
line = line.strip()
28-
if line.startswith('http://') or line.startswith('https://'):
29-
scrape_url(line)
30-
break
31-
else:
32-
print("No valid URL found in issue body")
56+
if len(sys.argv) < 2:
57+
print("No URL provided")
3358
sys.exit(1)
59+
60+
url = sys.argv[1].strip()
61+
# Basic URL validation
62+
if not (url.startswith('http://') or url.startswith('https://')):
63+
print("Invalid URL provided")
64+
sys.exit(1)
65+
66+
fetch_readme(url)

0 commit comments

Comments
 (0)