ai_hackathon/funcs.py at main · Masterlincs/ai_hackathon · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import requests
import xml.etree.ElementTree as ET
import random
import time
import streamlit as st

class ArxivPaper:
    def __init__(self, arxiv_id):
        self.arxiv_id = arxiv_id
        self.title = None
        self.authors = []
        self.summary = None

    def fetch_details(self):
        base_url = "http://export.arxiv.org/api/query"
        params = {"id_list": self.arxiv_id}
        response = requests.get(base_url, params=params)

        if response.status_code == 200:
            root = ET.fromstring(response.content)
            entry = root.find('{http://www.w3.org/2005/Atom}entry')

            if entry is not None:
                title_element = entry.find('{http://www.w3.org/2005/Atom}title')
                if title_element is not None:
                    self.title = title_element.text.strip()

                summary_element = entry.find('{http://www.w3.org/2005/Atom}summary')
                if summary_element is not None:
                    self.summary = summary_element.text.strip()

                author_elements = entry.findall('{http://www.w3.org/2005/Atom}author')
                for author_element in author_elements:
                    name_element = author_element.find('{http://www.w3.org/2005/Atom}name')
                    if name_element is not None:
                        self.authors.append(name_element.text.strip())

                return True
        return False

    def display_details(self):
        print(f"Title: {self.title}")
        print("Authors: ", ", ".join(self.authors))
        print(f"Summary: {self.summary}")


def summarise_blurb(blurb, api_key, max_retries=3):
    API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
    headers = {"Authorization": f"Bearer {api_key}"}
    payload = {"inputs": blurb, "parameters": {"max_length": 500, "min_length": 50}}

    for attempt in range(max_retries):
        try:
            response = requests.post(API_URL, headers=headers, json=payload)

            if response.status_code == 401:
                raise ValueError("Invalid Hugging Face API key.")
            elif response.status_code == 429:
                print(f"Rate limit hit. Waiting 5 seconds...")
                time.sleep(5)
                continue
            elif response.status_code >= 400:
                raise requests.HTTPError(f"HTTP {response.status_code}: {response.text}")

            result = response.json()
            if isinstance(result, list) and len(result) > 0:
                return result[0]['summary_text']
            else:
                raise ValueError("Unexpected response format")

        except requests.RequestException as e:
            if attempt == max_retries - 1:
                raise
            print(f"Attempt {attempt + 1} failed. Retrying in 5 seconds...")
            time.sleep(5)

    return "Failed to summarize the blurb after multiple attempts."


def write_new_blurb(blurb_summary, api_key):
    API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
    headers = {"Authorization": f"Bearer {api_key}"}
    payload = {
        "inputs": blurb_summary,
        "parameters": {"max_length": len(blurb_summary)+150, "num_beams": 5, "min_length":len(blurb_summary)-50}
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()[0]['generated_text']

def compare_blurbs(blurb, ai_blurb, api_key):
    API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2"
    headers = {"Authorization": f"Bearer {api_key}"}
    payload = {
        "inputs": {
            "source_sentence": blurb,
            "sentences": [ai_blurb]
        }
    }
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def is_valid_api_key(api_key: str) -> bool:
    headers = {
        "Authorization": f"Bearer {api_key}",
    }
    response = requests.get("https://huggingface.co/api/whoami-v2", headers=headers)

    # If the status code is 200, the API key is valid
    if response.status_code == 200:
        return True
    else:
        return False


previous_ids = set()

def generate_random_arxiv_id():
    random.seed(time.time())
    # Generate a random arXiv ID with realistic formatting
    year = random.randint(10,24)  # Choose a year (last two digits)
    month = random.randint(1, 12)
    month_str = f"{month:02d}"
    paper_number = random.randint(1, 9999)  # Allow up to 5 digits for realistic ID
    paper_number_str = str(paper_number).zfill(5)

    arxiv_id = f"{year}{month_str}.{paper_number_str}"
    return arxiv_id

@st.cache_data
def fetch_random_valid_paper_details():
    while True:
        random_arxiv_id = generate_random_arxiv_id()
        if random_arxiv_id not in previous_ids:
            previous_ids.add(random_arxiv_id)
            paper = ArxivPaper(random_arxiv_id)
            if paper.fetch_details():
                paper.display_details()
                return random_arxiv_id
        print(f"Invalid arXiv ID: {random_arxiv_id}, retrying...")