From bc3334e4d84d00b72ad52303dc144c1cec595917 Mon Sep 17 00:00:00 2001 From: Mugundh J B Date: Wed, 9 Oct 2024 22:10:06 +0530 Subject: [PATCH] Added Research Profile Summarizer --- .../Research-Profile-Summarizer/README.md | 92 +++++++++++++++ .../Research-Profile-Summarizer/app.py | 109 ++++++++++++++++++ .../requirements.txt | 5 + 3 files changed, 206 insertions(+) create mode 100644 Algorithms and Deep Learning Models/Research-Profile-Summarizer/README.md create mode 100644 Algorithms and Deep Learning Models/Research-Profile-Summarizer/app.py create mode 100644 Algorithms and Deep Learning Models/Research-Profile-Summarizer/requirements.txt diff --git a/Algorithms and Deep Learning Models/Research-Profile-Summarizer/README.md b/Algorithms and Deep Learning Models/Research-Profile-Summarizer/README.md new file mode 100644 index 000000000..2dff99f08 --- /dev/null +++ b/Algorithms and Deep Learning Models/Research-Profile-Summarizer/README.md @@ -0,0 +1,92 @@ +# **Research Profile Summarizer** + +### ๐ŸŽฏ **Goal** + +The primary goal of **Research Profile Summarizer** is to provide a comprehensive tool for researchers to gather, summarize, and analyze academic profiles. The app retrieves key information about authors, their research interests, citations, and top publications, enhancing accessibility and streamlining the research process. + +### ๐Ÿงต **Dataset** + +**Research Profile Summarizer** does not rely on a pre-existing dataset. Instead, it utilizes live data retrieved from academic databases through the **scholarly** library, allowing users to access real-time information on various authors and their works. + +### ๐Ÿงพ **Description** + +**Research Profile Summarizer** enables users to input author names, retrieve their academic profiles, and generate summaries using automated text processing and generative AI. The application is designed for seamless interaction, providing users with concise and informative outputs, making academic research more efficient and accessible. + +### ๐Ÿงฎ **What I had done!** + +- Integrated a **data retrieval system** using the **scholarly** library to gather information on authors. +- Utilized **Google Generative AI** for generating concise summaries of author profiles. +- Deployed the application using **Streamlit** to create a user-friendly web interface for interaction. + +### ๐Ÿš€ **Models Implemented** + +- **Google Generative AI**: Chosen for its advanced natural language understanding and high accuracy in generating meaningful summaries based on the retrieved data. + +### ๐Ÿ“š **Libraries Needed** + +- `streamlit` +- `pandas` +- `scholarly` +- `google.generativeai` +- `dotenv` + +### ๐Ÿ“Š **Exploratory Data Analysis Results** + +This project does not involve traditional exploratory data analysis, as it focuses on real-time data retrieval and summarization. However, if relevant visualizations or processing statistics are generated (e.g., citation counts, summary lengths), they can be displayed here. + +### ๐Ÿ“ˆ **Performance of the Models based on the Accuracy Scores** + +The performance of the system can be evaluated based on: +- **Response accuracy**: How well the system retrieves and summarizes relevant information from author profiles. +- **Summary quality**: The clarity and conciseness of the generated summaries. + +### ๐Ÿ’ป How to run + +To get started with **Research Profile Summarizer**, follow these steps: + +1. Navigate to the project directory: + + ```bash + cd Research-Profile-Summarizer + ``` + +2. (Optional) Activate a virtual environment: + + ```bash + conda create -n venv python=3.10+ + conda activate venv + ``` + +3. Install dependencies: + + ```bash + pip install -r requirements.txt + ``` + +4. Configure environment variables: + + ``` + Rename `.env-sample` to `.env` file. + Replace with your Google API Key. + ``` + + Kindly refer to this site for getting [your own key](https://ai.google.dev/tutorials/setup). +
+ +5. Run the application: + + ```bash + streamlit run app.py + ``` + + PS: Explore other functionalities within the app as well. + +### ๐Ÿ“ข **Conclusion** + +**Research Profile Summarizer** successfully integrates data retrieval and AI-powered summarization to assist researchers in navigating academic profiles. It ensures high interaction accuracy by leveraging state-of-the-art models like Google Generative AI, providing a reliable and accessible research tool for its users. + +### โœ’๏ธ **Signature** + +**[J B Mugundh]** +GitHub: [Github](https://github.com/J-B-Mugundh) +LinkedIn: [Linkedin](https://www.linkedin.com/in/mugundhjb/) diff --git a/Algorithms and Deep Learning Models/Research-Profile-Summarizer/app.py b/Algorithms and Deep Learning Models/Research-Profile-Summarizer/app.py new file mode 100644 index 000000000..504cb80d2 --- /dev/null +++ b/Algorithms and Deep Learning Models/Research-Profile-Summarizer/app.py @@ -0,0 +1,109 @@ +import streamlit as st +from scholarly import scholarly +import pandas as pd +import google.generativeai as genai +import os + +# Configure Google Generative AI +genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) + +# Streamlit App +st.title("๐Ÿค– Research Profile Summarizer") + +# Input for author name +author_name = st.text_input("Enter the author's name:", "Steven A Cholewiak") + +if st.button("Generate Summary"): + # Retrieve the author's data + search_query = scholarly.search_author(author_name) + first_author_result = next(search_query) + author = scholarly.fill(first_author_result) + + # Initialize a string to store all textual data + summary_text = "" + + # Display author's name and affiliation + author_info = [ + f"**Name:** {author['name']}", + f"**Affiliation:** {author.get('affiliation', 'N/A')}" + ] + + st.subheader("Author Information") + for info in author_info: + st.write(info) # Display each piece of information as a separate line + summary_text += info + "\n" + + + # Display research interests as a list + st.subheader("Research Interests") + interests = author.get('interests', []) + if interests: + interests_list = "- " + "\n- ".join(interests) # Display interests as a bullet list + st.write(interests_list) + summary_text += f"**Research Interests:**\n{interests_list}\n" + else: + st.write('N/A') + summary_text += "**Research Interests:** N/A\n" + + # Citations overview + st.subheader("Citations Overview") + citations = { + "Total Citations": author.get('citedby', 'N/A'), + "Citations (Last 5 Years)": author.get('citedby5y', 'N/A') + } + for citation_name, citation_value in citations.items(): + st.write(f"**{citation_name}:** {citation_value}") + summary_text += f"**{citation_name}:** {citation_value}\n" + + # Citations per year + citations_per_year = author.get('cites_per_year', {}) + if citations_per_year: + citations_df = pd.DataFrame(list(citations_per_year.items()), columns=['Year', 'Citations']) + st.subheader("Citations Per Year") + st.line_chart(citations_df.set_index('Year')) + summary_text += "Citations data is available.\n" + else: + st.write("No citation data available for the past years.") + summary_text += "No citation data available for the past years.\n" + + # Indexes + st.subheader("Indexes") + indexes = { + "H-Index": author.get('hindex', 'N/A'), + "H-Index (Last 5 Years)": author.get('hindex5y', 'N/A'), + "i10-Index": author.get('i10index', 'N/A'), + "i10-Index (Last 5 Years)": author.get('i10index5y', 'N/A') + } + + # Displaying indexes in a more structured format + for index_name, index_value in indexes.items(): + st.write(f"**{index_name}:** {index_value}") + summary_text += f"**{index_name}:** {index_value}\n" + + # Display top publications + st.subheader("Top Publications") + top_publications = sorted(author['publications'], key=lambda x: x.get('num_citations', 0), reverse=True)[:5] + top_publications_text = "" + for pub in top_publications: + pub_filled = scholarly.fill(pub) + publication_info = f"- **{pub_filled['bib']['title']}** (Citations: {pub_filled.get('num_citations', 0)})" + st.write(publication_info) + top_publications_text += publication_info + "\n" + + summary_text += f"**Top Publications:**\n{top_publications_text}\n" + + # Generate summary using Google Generative AI + model = genai.GenerativeModel("gemini-pro") + chat = model.start_chat(history=[]) + + # Function to generate summary using Gemini Pro model + def generate_summary(data): + summary_prompt = f"Write a concise 200-word summary based on the following information:\n{data}\nInclude key details like research interests, citations, H-index, co-authors, and notable publications." + response = chat.send_message(summary_prompt) + summary = "".join([chunk.text for chunk in response]) + return summary + + # Generate and display the summary + generated_summary = generate_summary(summary_text) + st.subheader("Profile Summary") + st.write(generated_summary) diff --git a/Algorithms and Deep Learning Models/Research-Profile-Summarizer/requirements.txt b/Algorithms and Deep Learning Models/Research-Profile-Summarizer/requirements.txt new file mode 100644 index 000000000..fc0a8d507 --- /dev/null +++ b/Algorithms and Deep Learning Models/Research-Profile-Summarizer/requirements.txt @@ -0,0 +1,5 @@ +streamlit +pandas +scholarly +google-generativeai +python-dotenv \ No newline at end of file