@@ -13,6 +13,11 @@ def extract_pdf_info(text):
1313 pattern = r'\|.*?\|\s*🔥*\[([^\]]+)\].*?\[\[(.*?pdf.*?)\]\]\((https://[^\)]+)\)'
1414 matches = re .findall (pattern , text )
1515 info_list = []
16+ # Add default pdf: https://github.com/xlite-dev/Awesome-LLM-Inference/releases/download/v0.3/Awesome-LLM-Inference-v0.3.pdf.zip
17+ info_list .append ((
18+ "Awesome-LLM-Inference-v0.3" ,
19+ "https://github.com/xlite-dev/Awesome-LLM-Inference/releases/download/v0.3/Awesome-LLM-Inference-v0.3.pdf.zip"
20+ ))
1621 for title , _ , link in matches :
1722 # Remove special characters from the paper title to avoid illegal file names.
1823 valid_title = re .sub (r'[\\/*?:"<>|]' , '' , title )
@@ -34,8 +39,17 @@ def download_file(title, url):
3439 download_dir = 'downloaded_pdfs'
3540 if not os .path .exists (download_dir ):
3641 os .makedirs (download_dir )
37-
38- file_name = os .path .join (download_dir , f"{ title } .pdf" )
42+ # Use the paper title as the file name.
43+ # Replace any invalid characters in the title with underscores.
44+ if not url .endswith ('.zip' ):
45+ file_name = os .path .join (download_dir , f"{ title } .pdf" )
46+ else :
47+ file_name = os .path .join (download_dir , f"{ title } .zip" )
48+ # Check if the file already exists.
49+ if os .path .exists (file_name ):
50+ print (f"File { file_name } already exists. Skipping download." )
51+ return True
52+ # Get the total size of the file from the response headers.
3953 total_size = int (response .headers .get ('content-length' , 0 ))
4054
4155 print (f"Downloading paper { title } , file name is { file_name } ..." )
@@ -45,6 +59,8 @@ def download_file(title, url):
4559 unit = 'B' ,
4660 unit_scale = True ,
4761 unit_divisor = 1024 ,
62+ colour = "yellow" ,
63+ ascii = True ,
4864 ) as bar :
4965 for data in response .iter_content (chunk_size = 1024 ):
5066 size = file .write (data )
@@ -64,7 +80,7 @@ def main():
6480 pdf_info = extract_pdf_info (text )
6581 print (f"A total of { len (pdf_info )} PDF links were matched." )
6682
67- for title , link in pdf_info :
83+ for title , link in tqdm ( pdf_info , colour = "blue" ) :
6884 download_file (title , link )
6985 except FileNotFoundError :
7086 print ("README.md file not found. Please ensure the file exists in the current working directory." )
0 commit comments