-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain_replit_full_articles.py
More file actions
133 lines (110 loc) · 3.98 KB
/
main_replit_full_articles.py
File metadata and controls
133 lines (110 loc) · 3.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import os
import sys
import time
from Bio import Entrez
from Bio import Medline
import requests
# Set your email for Entrez
Entrez.email = "your_email@example.com" # Replace with your email
def search_pubmed(query, max_results=10):
"""
Search PubMed for articles based on the given query.
"""
try:
handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results)
results = Entrez.read(handle)
handle.close()
return results["IdList"]
except Exception as e:
print(f"Error searching PubMed: {e}")
return []
def fetch_pubmed_details(id_list):
"""
Fetch details of PubMed articles given their IDs.
"""
try:
ids = ",".join(id_list)
handle = Entrez.efetch(db="pubmed", id=ids, rettype="medline", retmode="text")
records = Medline.parse(handle)
return list(records)
except Exception as e:
print(f"Error fetching PubMed details: {e}")
return []
def check_pmc_availability(pmid):
"""
Check if an article is available in PubMed Central.
"""
try:
handle = Entrez.elink(dbfrom="pubmed", db="pmc", linkname="pubmed_pmc", id=pmid)
result = Entrez.read(handle)
handle.close()
if result[0]["LinkSetDb"]:
return result[0]["LinkSetDb"][0]["Link"][0]["Id"]
else:
return None
except Exception as e:
print(f"Error checking PMC availability: {e}")
return None
def download_pmc_article(pmcid, output_dir):
"""
Download an article from PubMed Central.
"""
base_url = "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC"
pdf_url = f"{base_url}{pmcid}/pdf/main.pdf"
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
response = requests.get(pdf_url, headers=headers)
response.raise_for_status()
filename = f"PMC{pmcid}.pdf"
filepath = os.path.join(output_dir, filename)
with open(filepath, 'wb') as f:
f.write(response.content)
print(f"Article downloaded: {filepath}")
return filepath
except requests.RequestException as e:
print(f"Error downloading article: {e}")
return None
def main():
query = input("Enter your PubMed search query: ")
max_results = int(input("Enter the maximum number of results (default 10): ") or 10)
print(f"Searching PubMed for: {query}")
id_list = search_pubmed(query, max_results)
if not id_list:
print("No results found.")
return
print(f"Found {len(id_list)} results.")
records = fetch_pubmed_details(id_list)
available_pmc = []
for record in records:
pmid = record.get("PMID", "N/A")
title = record.get("TI", "No title")
print(f"PMID: {pmid}")
print(f"Title: {title}")
pmcid = check_pmc_availability(pmid)
if pmcid:
print("Available in PMC")
available_pmc.append((pmid, pmcid, title))
else:
print("Not available in PMC")
print("-" * 50)
if not available_pmc:
print("No articles available in PubMed Central.")
return
print("\nArticles available in PubMed Central:")
for i, (pmid, pmcid, title) in enumerate(available_pmc, 1):
print(f"{i}. PMID: {pmid}, PMCID: {pmcid}")
print(f" Title: {title}")
for pmid, pmcid, title in available_pmc:
print(f"Downloading article: {title}")
output_dir = "downloaded_articles"
os.makedirs(output_dir, exist_ok=True)
downloaded_file = download_pmc_article(pmcid, output_dir)
if downloaded_file:
print(f"Article successfully downloaded to {downloaded_file}")
else:
print(f"Failed to download the article: {title}")
print("All articles downloaded.")
if __name__ == "__main__":
main()