-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfuncs.py
More file actions
139 lines (114 loc) · 5.11 KB
/
funcs.py
File metadata and controls
139 lines (114 loc) · 5.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import requests
import xml.etree.ElementTree as ET
import random
import time
import streamlit as st
class ArxivPaper:
def __init__(self, arxiv_id):
self.arxiv_id = arxiv_id
self.title = None
self.authors = []
self.summary = None
def fetch_details(self):
base_url = "http://export.arxiv.org/api/query"
params = {"id_list": self.arxiv_id}
response = requests.get(base_url, params=params)
if response.status_code == 200:
root = ET.fromstring(response.content)
entry = root.find('{http://www.w3.org/2005/Atom}entry')
if entry is not None:
title_element = entry.find('{http://www.w3.org/2005/Atom}title')
if title_element is not None:
self.title = title_element.text.strip()
summary_element = entry.find('{http://www.w3.org/2005/Atom}summary')
if summary_element is not None:
self.summary = summary_element.text.strip()
author_elements = entry.findall('{http://www.w3.org/2005/Atom}author')
for author_element in author_elements:
name_element = author_element.find('{http://www.w3.org/2005/Atom}name')
if name_element is not None:
self.authors.append(name_element.text.strip())
return True
return False
def display_details(self):
print(f"Title: {self.title}")
print("Authors: ", ", ".join(self.authors))
print(f"Summary: {self.summary}")
def summarise_blurb(blurb, api_key, max_retries=3):
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
headers = {"Authorization": f"Bearer {api_key}"}
payload = {"inputs": blurb, "parameters": {"max_length": 500, "min_length": 50}}
for attempt in range(max_retries):
try:
response = requests.post(API_URL, headers=headers, json=payload)
if response.status_code == 401:
raise ValueError("Invalid Hugging Face API key.")
elif response.status_code == 429:
print(f"Rate limit hit. Waiting 5 seconds...")
time.sleep(5)
continue
elif response.status_code >= 400:
raise requests.HTTPError(f"HTTP {response.status_code}: {response.text}")
result = response.json()
if isinstance(result, list) and len(result) > 0:
return result[0]['summary_text']
else:
raise ValueError("Unexpected response format")
except requests.RequestException as e:
if attempt == max_retries - 1:
raise
print(f"Attempt {attempt + 1} failed. Retrying in 5 seconds...")
time.sleep(5)
return "Failed to summarize the blurb after multiple attempts."
def write_new_blurb(blurb_summary, api_key):
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
headers = {"Authorization": f"Bearer {api_key}"}
payload = {
"inputs": blurb_summary,
"parameters": {"max_length": len(blurb_summary)+150, "num_beams": 5, "min_length":len(blurb_summary)-50}
}
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()[0]['generated_text']
def compare_blurbs(blurb, ai_blurb, api_key):
API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2"
headers = {"Authorization": f"Bearer {api_key}"}
payload = {
"inputs": {
"source_sentence": blurb,
"sentences": [ai_blurb]
}
}
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def is_valid_api_key(api_key: str) -> bool:
headers = {
"Authorization": f"Bearer {api_key}",
}
response = requests.get("https://huggingface.co/api/whoami-v2", headers=headers)
# If the status code is 200, the API key is valid
if response.status_code == 200:
return True
else:
return False
previous_ids = set()
def generate_random_arxiv_id():
random.seed(time.time())
# Generate a random arXiv ID with realistic formatting
year = random.randint(10,24) # Choose a year (last two digits)
month = random.randint(1, 12)
month_str = f"{month:02d}"
paper_number = random.randint(1, 9999) # Allow up to 5 digits for realistic ID
paper_number_str = str(paper_number).zfill(5)
arxiv_id = f"{year}{month_str}.{paper_number_str}"
return arxiv_id
@st.cache_data
def fetch_random_valid_paper_details():
while True:
random_arxiv_id = generate_random_arxiv_id()
if random_arxiv_id not in previous_ids:
previous_ids.add(random_arxiv_id)
paper = ArxivPaper(random_arxiv_id)
if paper.fetch_details():
paper.display_details()
return random_arxiv_id
print(f"Invalid arXiv ID: {random_arxiv_id}, retrying...")