diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..722d5e7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.vscode
diff --git a/.streamlit/credentials.json b/.streamlit/credentials.json
new file mode 100644
index 0000000..c544db1
--- /dev/null
+++ b/.streamlit/credentials.json
@@ -0,0 +1,13 @@
+{
+ "type": "service_account",
+ "project_id": "sebi-hackathon-sheets-db",
+ "private_key_id": "56fc605149245f890e4d36df6a862918fd8bf870",
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCl/CDNECKsr0MH\nRTqkf9/VdDRY458JUquQkKaNV4sqN8N2LnbQTqaxPc9vM4fZ9RL8y3m3Qoztec/W\nWYPAORlT/SAv41l6AvahimTtiOjDpM5rK0lNMR3b61Aephlr6hJc3lNmaU8lRqLm\nU4XCMXy9HGiMyy4HtQvzHp7uYWfKLBjKiLGbd6v51U+HA9QylYm4K7lmqs/xWU4U\nm15f5WdRk242IuDzNSUGvENZph4tB4D2MZklSg1N+ffW6dmKhrPv6HR2PCtPVa1J\niDeyR9nP5bdJaURBWldSmJgX5LTkifOAVzm00/0GEQKLhjU1Be4pQp1fq3zMFCgq\nPgTfw+AxAgMBAAECggEAOcboMgk+UKbXKaY28gDTDM2c1R4ygz/EGZfIaeXBuunb\nP32P/RWLn6GY3aftz3gn6lf3n8tqG9Wte3sASf52Kw6u0AyO3lI+wF9Yn3hrMYLi\nGVrhcp24u48O/gQqUC6TwsDe/gSFOL/8Pzluk3i/Z2KGoA+vj549MR0FDIxaYPxl\n/Mriz6Vw4hhr+7e3arMknFOw69dltY0LVliddV/4PskulFYWWs7XhOLTrtykXEZf\nQ331/luvWE5OuoYpjzqu5SZh7LhwXEfXFZG7cYWSg8AJfk77U/l97Zo26mjEvsKP\nLObHILcbFR0YEz625Z3sasHcKI+jRKUwfUVPpsPwAwKBgQDTb92dp6azql0jWbV+\nJFCyyeHvfiDurnmAQADxHkq+U+HbTW+imuXZx9gT7HhhTODqSIhfrXXkDvouUULb\nMTNBt9Kacva5pl/9t+OLAwAJEV5TN+0jvC89z6/jiDs7msyDmjSNRhQdemh4VowF\nrYOx8wz9NpjCm+4CjtW4fdn4RwKBgQDI9+IWLIpOIp+LNU4TmElep5Y2eXRyDAem\nERzCpNoBqelgWTKxvyl6XpsxB3LFxReSvyGLydp1vvpSXTFjDM58zDBpvfAsX5gb\nliUy87vqvefNH7+KiizkARNPItdL98+bKl7vV+eYcOXQphL6mPGpsNV4uM5GiCVq\nfNXl6g6XxwKBgFuTivf6ogAVHKvv8eTFk9IsEFhrV5YpKWS70+ybAyJ2ME+MtXpG\n/qCZpuPSOr1hz9SJP8+40GPLfN5Mwdkv8RRYqrXMb0IvZkn8uGgAD6bdx+3hPiuJ\nGSj7aCHzYjxJqqShSHuby6CM4OymB5V59p0/OPP/DqDTan3fIX/7ukmHAoGAZlKP\nsmqLIK1QPjitwBxRfvgk6yd7unWVj2A3HIOC3EX+pSfFP+rs23k/2v7H2Ro4nmbi\nVaI4sKrEGRuKAbyLhc8Q90XV+0HQ2HriBcdDbomydCYuCAPCR3SkuC4tIGjUM1Sn\ncYV4TY1t9YYZM66bnKKEjirKG+9F3aoh3iYFuBECgYBmImItCOpF3wtkIys3s8oQ\nIJ1favSM4kAa0ujC8RgmGIlJmRoukG3BhKy6d4eGHUWFKR/3WtbMkU68cHIm8j0F\naETVUTwRwIdxjJE9d8NAkXT462iXBSyGnKpmvdoDUzbty4PMMc9stqcxXzk8Vuop\nHeDDLgogXN0T5a41yX9jAg==\n-----END PRIVATE KEY-----\n",
+ "client_email": "sebi-hackathon-service-acc@sebi-hackathon-sheets-db.iam.gserviceaccount.com",
+ "client_id": "111806018619958126515",
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+ "token_uri": "https://oauth2.googleapis.com/token",
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/sebi-hackathon-service-acc%40sebi-hackathon-sheets-db.iam.gserviceaccount.com",
+ "universe_domain": "googleapis.com"
+ }
\ No newline at end of file
diff --git a/.streamlit/sebi-hackathon-sheets-db-56fc60514924.json b/.streamlit/sebi-hackathon-sheets-db-56fc60514924.json
new file mode 100644
index 0000000..a9bb2e3
--- /dev/null
+++ b/.streamlit/sebi-hackathon-sheets-db-56fc60514924.json
@@ -0,0 +1,13 @@
+{
+ "type": "service_account",
+ "project_id": "sebi-hackathon-sheets-db",
+ "private_key_id": "56fc605149245f890e4d36df6a862918fd8bf870",
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCl/CDNECKsr0MH\nRTqkf9/VdDRY458JUquQkKaNV4sqN8N2LnbQTqaxPc9vM4fZ9RL8y3m3Qoztec/W\nWYPAORlT/SAv41l6AvahimTtiOjDpM5rK0lNMR3b61Aephlr6hJc3lNmaU8lRqLm\nU4XCMXy9HGiMyy4HtQvzHp7uYWfKLBjKiLGbd6v51U+HA9QylYm4K7lmqs/xWU4U\nm15f5WdRk242IuDzNSUGvENZph4tB4D2MZklSg1N+ffW6dmKhrPv6HR2PCtPVa1J\niDeyR9nP5bdJaURBWldSmJgX5LTkifOAVzm00/0GEQKLhjU1Be4pQp1fq3zMFCgq\nPgTfw+AxAgMBAAECggEAOcboMgk+UKbXKaY28gDTDM2c1R4ygz/EGZfIaeXBuunb\nP32P/RWLn6GY3aftz3gn6lf3n8tqG9Wte3sASf52Kw6u0AyO3lI+wF9Yn3hrMYLi\nGVrhcp24u48O/gQqUC6TwsDe/gSFOL/8Pzluk3i/Z2KGoA+vj549MR0FDIxaYPxl\n/Mriz6Vw4hhr+7e3arMknFOw69dltY0LVliddV/4PskulFYWWs7XhOLTrtykXEZf\nQ331/luvWE5OuoYpjzqu5SZh7LhwXEfXFZG7cYWSg8AJfk77U/l97Zo26mjEvsKP\nLObHILcbFR0YEz625Z3sasHcKI+jRKUwfUVPpsPwAwKBgQDTb92dp6azql0jWbV+\nJFCyyeHvfiDurnmAQADxHkq+U+HbTW+imuXZx9gT7HhhTODqSIhfrXXkDvouUULb\nMTNBt9Kacva5pl/9t+OLAwAJEV5TN+0jvC89z6/jiDs7msyDmjSNRhQdemh4VowF\nrYOx8wz9NpjCm+4CjtW4fdn4RwKBgQDI9+IWLIpOIp+LNU4TmElep5Y2eXRyDAem\nERzCpNoBqelgWTKxvyl6XpsxB3LFxReSvyGLydp1vvpSXTFjDM58zDBpvfAsX5gb\nliUy87vqvefNH7+KiizkARNPItdL98+bKl7vV+eYcOXQphL6mPGpsNV4uM5GiCVq\nfNXl6g6XxwKBgFuTivf6ogAVHKvv8eTFk9IsEFhrV5YpKWS70+ybAyJ2ME+MtXpG\n/qCZpuPSOr1hz9SJP8+40GPLfN5Mwdkv8RRYqrXMb0IvZkn8uGgAD6bdx+3hPiuJ\nGSj7aCHzYjxJqqShSHuby6CM4OymB5V59p0/OPP/DqDTan3fIX/7ukmHAoGAZlKP\nsmqLIK1QPjitwBxRfvgk6yd7unWVj2A3HIOC3EX+pSfFP+rs23k/2v7H2Ro4nmbi\nVaI4sKrEGRuKAbyLhc8Q90XV+0HQ2HriBcdDbomydCYuCAPCR3SkuC4tIGjUM1Sn\ncYV4TY1t9YYZM66bnKKEjirKG+9F3aoh3iYFuBECgYBmImItCOpF3wtkIys3s8oQ\nIJ1favSM4kAa0ujC8RgmGIlJmRoukG3BhKy6d4eGHUWFKR/3WtbMkU68cHIm8j0F\naETVUTwRwIdxjJE9d8NAkXT462iXBSyGnKpmvdoDUzbty4PMMc9stqcxXzk8Vuop\nHeDDLgogXN0T5a41yX9jAg==\n-----END PRIVATE KEY-----\n",
+ "client_email": "sebi-hackathon-service-acc@sebi-hackathon-sheets-db.iam.gserviceaccount.com",
+ "client_id": "111806018619958126515",
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+ "token_uri": "https://oauth2.googleapis.com/token",
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/sebi-hackathon-service-acc%40sebi-hackathon-sheets-db.iam.gserviceaccount.com",
+ "universe_domain": "googleapis.com"
+}
diff --git a/.streamlit/secrets.toml b/.streamlit/secrets.toml
new file mode 100644
index 0000000..823ccc8
--- /dev/null
+++ b/.streamlit/secrets.toml
@@ -0,0 +1,17 @@
+# .streamlit/secrets.toml
+
+public_gsheets_url = "https://docs.google.com/spreadsheets/d/1w-zpQ66_hxtQfDlAidxmExSMBpUGGIAos9dvlRktk74/edit?usp=sharing"
+
+openai_apikey = "---- ENTER YOUR API KEY HERE -----"
+
+[gcp_service_account]
+type = "service_account"
+project_id = "sebi-hackathon-sheets-db"
+private_key_id = "56fc605149245f890e4d36df6a862918fd8bf870"
+private_key = "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCl/CDNECKsr0MH\nRTqkf9/VdDRY458JUquQkKaNV4sqN8N2LnbQTqaxPc9vM4fZ9RL8y3m3Qoztec/W\nWYPAORlT/SAv41l6AvahimTtiOjDpM5rK0lNMR3b61Aephlr6hJc3lNmaU8lRqLm\nU4XCMXy9HGiMyy4HtQvzHp7uYWfKLBjKiLGbd6v51U+HA9QylYm4K7lmqs/xWU4U\nm15f5WdRk242IuDzNSUGvENZph4tB4D2MZklSg1N+ffW6dmKhrPv6HR2PCtPVa1J\niDeyR9nP5bdJaURBWldSmJgX5LTkifOAVzm00/0GEQKLhjU1Be4pQp1fq3zMFCgq\nPgTfw+AxAgMBAAECggEAOcboMgk+UKbXKaY28gDTDM2c1R4ygz/EGZfIaeXBuunb\nP32P/RWLn6GY3aftz3gn6lf3n8tqG9Wte3sASf52Kw6u0AyO3lI+wF9Yn3hrMYLi\nGVrhcp24u48O/gQqUC6TwsDe/gSFOL/8Pzluk3i/Z2KGoA+vj549MR0FDIxaYPxl\n/Mriz6Vw4hhr+7e3arMknFOw69dltY0LVliddV/4PskulFYWWs7XhOLTrtykXEZf\nQ331/luvWE5OuoYpjzqu5SZh7LhwXEfXFZG7cYWSg8AJfk77U/l97Zo26mjEvsKP\nLObHILcbFR0YEz625Z3sasHcKI+jRKUwfUVPpsPwAwKBgQDTb92dp6azql0jWbV+\nJFCyyeHvfiDurnmAQADxHkq+U+HbTW+imuXZx9gT7HhhTODqSIhfrXXkDvouUULb\nMTNBt9Kacva5pl/9t+OLAwAJEV5TN+0jvC89z6/jiDs7msyDmjSNRhQdemh4VowF\nrYOx8wz9NpjCm+4CjtW4fdn4RwKBgQDI9+IWLIpOIp+LNU4TmElep5Y2eXRyDAem\nERzCpNoBqelgWTKxvyl6XpsxB3LFxReSvyGLydp1vvpSXTFjDM58zDBpvfAsX5gb\nliUy87vqvefNH7+KiizkARNPItdL98+bKl7vV+eYcOXQphL6mPGpsNV4uM5GiCVq\nfNXl6g6XxwKBgFuTivf6ogAVHKvv8eTFk9IsEFhrV5YpKWS70+ybAyJ2ME+MtXpG\n/qCZpuPSOr1hz9SJP8+40GPLfN5Mwdkv8RRYqrXMb0IvZkn8uGgAD6bdx+3hPiuJ\nGSj7aCHzYjxJqqShSHuby6CM4OymB5V59p0/OPP/DqDTan3fIX/7ukmHAoGAZlKP\nsmqLIK1QPjitwBxRfvgk6yd7unWVj2A3HIOC3EX+pSfFP+rs23k/2v7H2Ro4nmbi\nVaI4sKrEGRuKAbyLhc8Q90XV+0HQ2HriBcdDbomydCYuCAPCR3SkuC4tIGjUM1Sn\ncYV4TY1t9YYZM66bnKKEjirKG+9F3aoh3iYFuBECgYBmImItCOpF3wtkIys3s8oQ\nIJ1favSM4kAa0ujC8RgmGIlJmRoukG3BhKy6d4eGHUWFKR/3WtbMkU68cHIm8j0F\naETVUTwRwIdxjJE9d8NAkXT462iXBSyGnKpmvdoDUzbty4PMMc9stqcxXzk8Vuop\nHeDDLgogXN0T5a41yX9jAg==\n-----END PRIVATE KEY-----\n"
+client_email = "sebi-hackathon-service-acc@sebi-hackathon-sheets-db.iam.gserviceaccount.com"
+client_id = "111806018619958126515"
+auth_uri = "https://accounts.google.com/o/oauth2/auth"
+token_uri = "https://oauth2.googleapis.com/token"
+auth_provider_x509_cert_url = "https://www.googleapis.com/oauth2/v1/certs"
+client_x509_cert_url = "https://www.googleapis.com/robot/v1/metadata/x509/sebi-hackathon-service-acc%40sebi-hackathon-sheets-db.iam.gserviceaccount.com"
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..d99f2f3
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+ "[python]": {
+ "editor.defaultFormatter": "ms-python.black-formatter"
+ },
+ "python.formatting.provider": "none"
+}
\ No newline at end of file
diff --git a/01_homepage.py b/01_homepage.py
new file mode 100644
index 0000000..f89a62f
--- /dev/null
+++ b/01_homepage.py
@@ -0,0 +1,20 @@
+# Import necessary libraries
+import streamlit as st
+from streamlit_player import st_player
+
+st.set_page_config(
+ page_title="Smart With Sebi",
+ page_icon=":detective:",
+ layout="wide",
+)
+
+with st.columns(3)[1]:
+ st.image("media/smart_with_sebi.jpeg", width=200)
+
+st.markdown("
Welcome to Smart with SEBI portal", unsafe_allow_html=True)
+
+st.markdown("
About this project:
", unsafe_allow_html=True)
+
+st.sidebar.success("Select a page above!")
+
+st_player("https://www.youtube.com/watch?v=jNVTSDwTm14")
\ No newline at end of file
diff --git a/README.md b/README.md
index d23b4aa..4ae5a5f 100644
--- a/README.md
+++ b/README.md
@@ -9,18 +9,69 @@
## README.md must consist of the following information:
-#### Team Name -
-#### Problem Statement -
-#### Team Leader Email -
+#### Team Name - Emperor
+#### Problem Statement - Misleading Claim Detection & Content Curation
+#### Team Leader Email - hindeshnit19@gmail.com
+#### Project Demo Link:
+#### Project hosted at:
## A Brief of the Prototype:
- This section must include UML Diagrams and prototype description
+ 
+
+ Working:
+
+ 1. There are two separate portals, one for common user which includes claim detection and educational content generation and other for SEBI personnel for claim detection and features like source reliability rating.
+
+ 2. At-first any user inputs TEXT/AUDIO/VIDEO/IMAGE (currently only upload is supported, Web Scraping is reserved for future use-case) then the input is converted to text first.
+
+ 3. The text prompt is passed through OpenAI's whisper-1 model with precise prompt to output whether is claim is misleading or not, and based on certain financial points, suppose the input is:
+ "Invest into company A, which is giving good returns."
+ Certain financial keywords: invest, returns are extracted and then based on these key words, videos from YouTube are suggested to the user.
+
+ 4. **MOST IMPORTANT FEATURES for SEBI Personnel:**
+
+ 4.1. Based on the input claim, certain rules and regulations are also suggested based upon mapping from the financial concept to the SEBI rules and regulations database.\
+
+ 4.2. A source reliability rating database in the form of Google Sheets is connected to the app so that whenever any claim is being judged, it is pushed to tha database (Google Sheet) and a score is marked for each claim type: -1 for MISLEADING and +1 for NOT MISLEADING, based on the cumulative scores a ranking is assigned and displayed.
+
+
## Tech Stack:
- List Down all technologies used to Build the prototype
+ Following technologies were used:\
+ 1. Streamlit - for web-development\
+ 2. HuggingFace - for sentiment analysis pipeline\
+ 3. LangChain - For accessing OpenAI \
+ 4. PyTesseract - for Optical Character Recognition\
+ 5. pydub - for audio segmentation\
+ 6. moviePy - for detaching audio from video\
+ 7. GoogleSheets - for backend database
## Step-by-Step Code Execution Instructions:
- This Section must contain a set of instructions required to clone and run the prototype so that it can be tested and deeply analyzed
+ 1. clone the repository:
+ ```
+ git clone https://github.com/hindesh-akash/Empowering-Investors-Hackathon.git
+ ```
+
+ 2. **IMPORTANT**:
+
+ -> Go to '.streamlit/secrets.toml' \
+ -> Enter your OpenAI api key in 'openai_apikey' variable \
+ -> save and exit
+ 3. Open a new python terminal
+ 4. Run the command:
+ ```
+ pip install -r requirements.txt
+ ```
+ ```
+ streamlit run 01_homepage.py
+ ```
+ 5. Explore the app
+
## What I Learned:
- Write about the biggest learning you had while developing the prototype
+ I learned about what problems are faced in real world by common people, money is the neccessity of human being, and a person invests his/her money in order to get positive returns so that they can make their living better.
+ And we as contributors can help them achieve the goal.\
+ I faced many challenges while developing the prototype, especially connecting with the database and APIs, but in the end when I see the application of this project in real world, I feel great motivation to develop a fully functional solution.
+
+ Thank you for this opportunity!
+
diff --git a/__pycache__/apikey.cpython-310.pyc b/__pycache__/apikey.cpython-310.pyc
new file mode 100644
index 0000000..430a96e
Binary files /dev/null and b/__pycache__/apikey.cpython-310.pyc differ
diff --git a/__pycache__/prompts.cpython-310.pyc b/__pycache__/prompts.cpython-310.pyc
new file mode 100644
index 0000000..4bdb5d8
Binary files /dev/null and b/__pycache__/prompts.cpython-310.pyc differ
diff --git a/__pycache__/source_reliability_score.cpython-310.pyc b/__pycache__/source_reliability_score.cpython-310.pyc
new file mode 100644
index 0000000..0884aab
Binary files /dev/null and b/__pycache__/source_reliability_score.cpython-310.pyc differ
diff --git a/__pycache__/utils.cpython-310.pyc b/__pycache__/utils.cpython-310.pyc
new file mode 100644
index 0000000..71edc09
Binary files /dev/null and b/__pycache__/utils.cpython-310.pyc differ
diff --git a/apikey.py b/apikey.py
new file mode 100644
index 0000000..10e96a6
--- /dev/null
+++ b/apikey.py
@@ -0,0 +1,25 @@
+import streamlit as st
+
+apikey = st.secrets['openai_apikey'] #Bobby key
+
+''
+
+'''NOTE:
+This is the API key for the OpenAI API. It is a secret key and should not be shared with anyone.
+The key provided here is my personal key for demonstration purposes only on a trial version of the API.
+Please do not misuse this key.
+
+
+IF THERE IS API KEY PROBLEM, you can enter your own API KEY here.
+Follow the steps:
+
+1. Go to www.openai.com
+2. Sign up for an account
+3. Go to your dashboard
+4. Click on the "Billing" tab
+5. Click on the "API Keys" tab
+6. Click on "Create new API key"
+7. Copy the key and paste it in the apikey variable above
+
+'''
+
diff --git a/media/demo_video.mp4 b/media/demo_video.mp4
new file mode 100644
index 0000000..039da6a
Binary files /dev/null and b/media/demo_video.mp4 differ
diff --git a/media/influencer.png b/media/influencer.png
new file mode 100644
index 0000000..f3ba2d7
Binary files /dev/null and b/media/influencer.png differ
diff --git a/media/smart_with_sebi.jpeg b/media/smart_with_sebi.jpeg
new file mode 100644
index 0000000..ce5be81
Binary files /dev/null and b/media/smart_with_sebi.jpeg differ
diff --git a/media/text_influencer.jpg b/media/text_influencer.jpg
new file mode 100644
index 0000000..3458050
Binary files /dev/null and b/media/text_influencer.jpg differ
diff --git a/media/wirecard_audio.ogg b/media/wirecard_audio.ogg
new file mode 100644
index 0000000..819bd80
Binary files /dev/null and b/media/wirecard_audio.ogg differ
diff --git a/new_app.py b/new_app.py
new file mode 100644
index 0000000..d242b23
--- /dev/null
+++ b/new_app.py
@@ -0,0 +1,26 @@
+import streamlit as st
+
+import gspread
+from oauth2client.service_account import ServiceAccountCredentials
+
+# Authenticate with Google Sheets API
+scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
+credentials = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
+client = gspread.authorize(credentials)
+
+st.success("connection done")
+
+# Open the Google Sheet by its title
+sheet = client.open("sebi-hackathon-db")
+st.success("Sheet loaded")
+
+worksheet = sheet.get_worksheet(0) # Assuming you want to work with the first worksheet
+st.success("Worksheet loaded")
+
+# # Define the data for the new row
+new_row_data = ["Value1", "Value2", "Value3","Val4","val5"]
+
+# # Append the new row to the worksheet
+worksheet.append_row(new_row_data)
+
+st.success("Row appended")
diff --git a/pages/02_user.py b/pages/02_user.py
new file mode 100644
index 0000000..cab1c36
--- /dev/null
+++ b/pages/02_user.py
@@ -0,0 +1,100 @@
+# --- IMPORT DEPENDENCIES -------------------------------+
+import sys
+import streamlit as st
+
+# setting path
+
+sys.path.append("../prompts")
+sys.path.append("../utils")
+
+from prompts import *
+from utils import *
+from streamlit_option_menu import option_menu
+
+
+# --- TITLE --------------------------------------------+
+with st.columns(3)[1]:
+ st.image("media/smart_with_sebi.jpeg", width=200)
+
+st.markdown(" USER PORTAL
", unsafe_allow_html=True)
+
+
+# --- OPTION MENU --------------------------------------+
+selected = option_menu(
+ menu_title=None,
+ options=["TEXT", "AUDIO", "VIDEO", "IMAGE", "QnA"],
+ icons=["card-text", "volume-up", "camera-reels", "image", "question-circle"],
+ menu_icon="cast",
+ orientation="horizontal",
+ styles={
+ "container": {"padding": "0!important", "background-color": "#fafafa"},
+ "icon": {"color": "orange", "font-size": "25px"},
+ "nav-link": {"font-size": "20px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"},
+ "nav-link-selected": {"background-color": "indigo"},
+ },
+)
+
+# Main content based on user's selected
+if selected == "TEXT":
+ st.write("**TEXT BASED CLAIM DETECTION:**\n\n")
+ prompt = st.text_area("User Input")
+
+ if st.button("Submit") and prompt:
+ claim_detection_pipeline(prompt)
+ financial_points = financial_point_extractor(prompt)
+ display_video_links(financial_points)
+ # display_articles(financial_points)
+
+
+if selected == "AUDIO":
+ st.write("**AUDIO BASED CLAIM DETECTION:**")
+ audio_file = st.file_uploader("Upload an audio file", type=["ogg", "mp3", "wav"])
+
+ if audio_file:
+ st.audio(audio_file, format="audio/ogg", start_time=0)
+ transcript = transcribe_audio(audio_file)
+ prompt = st.text_area("Transcribed User Input", value=transcript)
+ if st.button("Submit") and prompt:
+ claim_detection_pipeline(prompt)
+ financial_points = financial_point_extractor(prompt)
+ display_video_links(financial_points)
+
+
+if selected == "IMAGE":
+ st.write("**IMAGE BASED CLAIM DETECTION:**")
+ image_file = st.file_uploader("Upload an image file", type=["png", "jpg", "jpeg"])
+
+ if image_file:
+ st.image(
+ image_file, caption="Uploaded Image.", use_column_width=True, width=300
+ )
+ text_from_image = get_text_from_image(image_file)
+ prompt = st.text_area("User input image to text:", value=text_from_image)
+
+ if st.button("Submit") and prompt:
+ claim_detection_pipeline(prompt)
+ financial_points = financial_point_extractor(prompt)
+ display_video_links(financial_points)
+
+if selected == "QnA":
+ st.write("**Question - Answering**")
+ prompt = st.text_area("User Input")
+
+ if st.button("Submit") and prompt:
+ question_and_answer(prompt)
+
+if selected == "VIDEO":
+ st.write("**VIDEO BASED CLAIM DETECTION:**")
+ video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi"])
+
+ if video_file:
+ st.video(video_file, start_time=0)
+ #----------------------------------------
+ transcript = video_to_text(video_file)
+ #----------------------------------------
+ prompt = st.text_area("Transcribed User Input", value=transcript)
+
+ if st.button("Submit") and prompt:
+ claim_detection_pipeline(prompt)
+ financial_points = financial_point_extractor(prompt)
+ display_video_links(financial_points)
\ No newline at end of file
diff --git a/pages/03_sebi.py b/pages/03_sebi.py
new file mode 100644
index 0000000..3683e30
--- /dev/null
+++ b/pages/03_sebi.py
@@ -0,0 +1,138 @@
+
+#--- IMPORT DEPENDENCIES -------------------------------+
+import sys
+import streamlit as st
+
+# setting path
+sys.path.append('../prompts')
+sys.path.append('../source_reliability_score')
+sys.path.append('../utils')
+
+
+from source_reliability_score import *
+from prompts import *
+from utils import *
+from streamlit_option_menu import option_menu
+
+
+
+# --- TITLE --------------------------------------------+
+with st.columns(3)[1]:
+ st.image("media/smart_with_sebi.jpeg", width=200)
+
+st.markdown(" SEBI PORTAL
", unsafe_allow_html=True)
+
+
+# --- OPTION MENU --------------------------------------+
+
+choice = option_menu(
+ menu_title=None,
+ options=["CLAIM DETECTION", "SOURCE RELIABILITY RATING"],
+ icons=["database-fill-exclamation", "person-fill-exclamation"],
+ menu_icon="cast",
+ orientation="horizontal",
+ styles={
+ "container": {"padding": "0!important", "background-color": "#fafafa"},
+ "icon": {"color": "orange", "font-size": "25px"},
+ "nav-link": {"font-size": "20px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"},
+ "nav-link-selected": {"background-color": "indigo"},
+ },
+)
+
+if choice == "CLAIM DETECTION":
+ selected = option_menu(
+ menu_title=None,
+ options=["TEXT", "AUDIO", "VIDEO","IMAGE","QnA"],
+ icons=["card-text", "volume-up", "camera-reels", "image","question-circle"],
+ menu_icon="cast",
+ orientation="horizontal",
+ styles={
+ "container": {"padding": "0!important", "background-color": "#fafafa"},
+ "icon": {"color": "orange", "font-size": "15px"},
+ "nav-link": {"font-size": "15px", "text-align": "left", "margin":"0px", "--hover-color": "#eee"},
+ "nav-link-selected": {"background-color": "indigo"},
+ },
+ )
+
+
+ # Main content based on sebi's employee selected
+ if selected == "TEXT":
+
+ st.write("**TEXT BASED CLAIM DETECTION:**\n\n")
+ prompt = st.text_area("User Input")
+ source_name = st.text_area("Source Name")
+ source_type = st.text_area("Source Type")
+
+ if st.button("Submit") and prompt:
+ response = claim_detection_pipeline(prompt)
+ add_details_source(source_name,source_type,response)
+
+ financial_points = financial_point_extractor(prompt)
+ display_sebi_rules(financial_points)
+
+
+
+ if selected == "AUDIO":
+ st.write("**AUDIO BASED CLAIM DETECTION:**")
+ audio_file = st.file_uploader("Upload an audio file", type=["ogg", "mp3", "wav"])
+
+ if audio_file:
+ st.audio(audio_file, format='audio/ogg', start_time=0)
+ transcript = transcribe_audio(audio_file)
+ prompt = st.text_area("Transcribed User Input", value=transcript)
+ source_name = st.text_area("Source Name")
+ source_type = st.text_area("Source Type")
+ if st.button("Submit") and prompt:
+ response = claim_detection_pipeline(prompt)
+ add_details_source(source_name,source_type,response)
+ financial_points = financial_point_extractor(prompt)
+ display_sebi_rules(financial_points)
+
+
+ if selected == "IMAGE":
+ st.write("**IMAGE BASED CLAIM DETECTION:**")
+ image_file = st.file_uploader("Upload an image file", type=["png", "jpg", "jpeg"])
+
+ if image_file:
+ st.image(image_file, caption='Uploaded Image.', use_column_width=True,width=300)
+ text_from_image = get_text_from_image(image_file)
+ prompt = st.text_area("User input image to text:", value=text_from_image)
+ source_name = st.text_area("Source Name")
+ source_type = st.text_area("Source Type")
+
+ if st.button("Submit") and prompt:
+ response = claim_detection_pipeline(prompt)
+ add_details_source(source_name,source_type,response)
+ financial_points = financial_point_extractor(prompt)
+ display_sebi_rules(financial_points)
+
+ if selected == "VIDEO":
+ st.write("**VIDEO BASED CLAIM DETECTION:**")
+ video_file = st.file_uploader("Upload a video file", type=["mp4", "mov", "avi"])
+
+ if video_file:
+ st.video(video_file, start_time=0)
+ #----------------------------------------
+ transcript = video_to_text(video_file)
+ #----------------------------------------
+ prompt = st.text_area("Transcribed User Input", value=transcript)
+
+ source_name = st.text_area("Source Name")
+ source_type = st.text_area("Source Type")
+ if st.button("Submit") and prompt:
+ response = claim_detection_pipeline(prompt)
+ add_details_source(source_name,source_type,response)
+ financial_points = financial_point_extractor(prompt)
+ display_sebi_rules(financial_points)
+
+ if selected == "QnA":
+ st.write("**Question - Answering**")
+ prompt = st.text_area("User Input")
+
+ if st.button("Submit") and prompt:
+ question_and_answer(prompt)
+
+if choice == "SOURCE RELIABILITY RATING":
+ if st.button("Click here to get the source reliability rating"):
+ display_source_scores()
+
\ No newline at end of file
diff --git a/prompts.py b/prompts.py
new file mode 100644
index 0000000..38298fc
--- /dev/null
+++ b/prompts.py
@@ -0,0 +1,22 @@
+financial_point_prompt= """\nOnly extract the strictly financial terms and not the name of any company or person from the above sentence. Only output the financial terms, your output should look like this: financial_statement_1,financial_statement_2, ...
+ Do not output any other sentence apart from bulleted points and any other measurable quantitty like "increased/less/more/decreased" etc. Not even the sentence like \"Sure, here are the financial terms extracted from the given sentence:\" Print output in a single line separated by commas and no new line."""
+
+
+claim_prompt = """Verify the above statement misleading or not. The output should be like: \"The given statement is **MISLEADING** OR The given statement is **NOT MISLEADING**\". Then after line change output the correct information while including the following keywords: About the company, current financial news."""
+
+
+sebi_rules_prompt = """Please provide me with information about five SEBI rules related to the above topic along with their relevant article numbers or rule numbers from the rulebook. For each rule, include the following details:
+
+Rules related to (the given topic) :
+
+1. Rule name:
+ - Rule:
+ - Relevant Article/Rule:
+
+2. Rule name:
+ - Rule:
+ - Relevant Article/Rule:
+
+...
+
+Please only output the bulleted rules and not sentences like "Please note that my information is based on data available up until September 2021, and I recommend referring to the official SEBI documents for the most current and accurate information." and also do not include sentences like "Absolutely, I understand your request. Here are five SEBI rules related to the stocks of a company, along with their relevant details:, also no thank you and nothing, just strictly output the bulleted points."""
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..c593b09
Binary files /dev/null and b/requirements.txt differ
diff --git a/source_reliability_score.py b/source_reliability_score.py
new file mode 100644
index 0000000..45b63f5
--- /dev/null
+++ b/source_reliability_score.py
@@ -0,0 +1,86 @@
+# streamlit_app.py
+#--------------------------- IMPORTS ---------------------------------+
+import pandas as pd
+import streamlit as st
+import datetime
+from streamlit_elements import elements, mui
+import gspread
+from oauth2client.service_account import ServiceAccountCredentials
+
+
+
+#---------------- Authenticate with Google Sheets API ---------------------------------+
+scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
+credentials = ServiceAccountCredentials.from_json_keyfile_name(".streamlit/credentials.json", scope)
+client = gspread.authorize(credentials)
+# st.success("connection done")
+
+# Open the Google Sheet by its title
+sheet = client.open("sebi-hackathon-db")
+# st.success("Sheet loaded")
+
+worksheet = sheet.get_worksheet(0) # Assuming we want to work with the first worksheet
+# st.success("Worksheet loaded")
+
+# -------------------Read in data from the Google Sheet -------------------------------+
+@st.cache_data(ttl=600)
+def load_data(sheets_url):
+ csv_url = sheets_url.replace("/edit?usp=sharing", "/export?format=csv&gid=0")
+ return pd.read_csv(csv_url,on_bad_lines='skip')
+
+#----------------------------------------------------+
+url = st.secrets['public_gsheets_url']
+link_text = "Click here to see the actual Google Sheet Database!"
+link = f"{link_text}"
+
+score_prompt = """Verify the above statement misleading or not. The output should contain only either MISLEADING or NOT MISLEADING. Do not output any other sentence apart from what I mentioned. Not even the sentence like \"Sure, here are the financial terms extracted from the given sentence:\" Print output in a single line."""
+
+#--------------------------- FUNCTIONS ---------------------------------+
+def score_response(response):
+
+ if('MISLEADING' in response or 'misleading' in response):
+ return -1
+ elif('NOT MISLEADING' in response or 'not misleading' in response):
+ return 1
+
+ return 0
+
+def add_details_source(source_name,source_type,response):
+ time_stamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+ score = score_response(response)
+ #Define the data for the new row
+ new_row_data = [str(time_stamp),source_name,source_type,response,score]
+ worksheet.append_row(new_row_data)
+ # st.success("Row appended")
+
+
+
+
+def update_scores():
+ df = load_data(st.secrets["public_gsheets_url"])
+ mean_scores = df.groupby('source_name')['score'].sum().reset_index()
+ ranking_df = mean_scores.sort_values(by='score', ascending=False)
+ ranking_df.reset_index(inplace=True, drop=True)
+ ranking_df.reset_index(inplace=True)
+ ranking_df.rename(columns={'index':'rank'},inplace=True)
+ ranking_df['rank'] = ranking_df['rank']+1
+
+ return ranking_df
+
+#--------------------------- DISPLAY SCORES ---------------------------------+
+def display_source_scores():
+ ranking_df = update_scores()
+ st.markdown(link, unsafe_allow_html=True)
+ for i in range(4):
+ with elements(f"new_elem_{i}"):
+ with mui.Card(sx={ "maxWidth": 345,"color":"#1e0096" }):
+ with mui.CardActionArea():
+ with mui.CardContent():
+ mui.Typography(f"{ranking_df.iloc[i]['source_name']}", gutterBottom=True, variant="h4", component="div")
+ mui.Typography(f"Rank: {ranking_df.iloc[i]['rank']}", variant="h6", color="#006996")
+ mui.Typography(f"Score: {ranking_df.iloc[i]['score']}", variant="h6", color="#151f1b")
+
+ with mui.CardActions():
+ mui.Button("Share", size="small", color="primary")
+
+
diff --git a/use_case.png b/use_case.png
new file mode 100644
index 0000000..d3de5c6
Binary files /dev/null and b/use_case.png differ
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..5241ccc
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,180 @@
+#----------------- IMPORTS -----------------+
+import streamlit as st
+from apikey import apikey
+from prompts import *
+import time
+
+from langchain.llms import OpenAI
+import os
+import re
+import openai
+from transformers import pipeline # Import pipeline from transformers
+from moviepy.editor import VideoFileClip
+from streamlit_player import st_player
+from langchain.tools import YouTubeSearchTool
+import requests
+from bs4 import BeautifulSoup
+import pytesseract
+from PIL import Image
+
+#--- SET API KEYS AND ENVIRONMENT VARIABLES ------------+
+# st.set_page_config(
+# page_title="Smart With Sebi - USER",
+# page_icon=":detective:",
+# layout="wide",
+# )
+
+os.environ["OPENAI_API_KEY"] = apikey
+openai.api_key = apikey
+
+
+
+# ------ Function to transcribe audio -----------------+
+def transcribe_audio(audio_file):
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
+ return transcript['text']
+
+# ------ Sentiment Analyzer -----------------+
+def sentiment_analyzer(text):
+ sentiment_analyzer_pipe = pipeline("sentiment-analysis")
+ return sentiment_analyzer_pipe(text)[0]['label']
+
+# ------ Financial Point Extractor -----------------+
+def financial_point_extractor(prompt):
+ #Financial points extraction
+ llm = OpenAI(temperature=0.9)
+ financial_points = llm(prompt + financial_point_prompt)
+ return financial_points
+
+# ------ Video Link Extractor -----------------+
+def get_video_links(financial_points):
+ tool = YouTubeSearchTool()
+ financial_points= re.sub(r'[^a-zA-Z, ]', '', financial_points)
+ all_links = []
+ for concept in financial_points.split(","):
+ search_results = tool.run("Investopedia: "+concept,1)
+ # Remove the starting "['" and ending "']"
+ cleaned_links_string = search_results[2:-2]
+
+ # Split the cleaned string into individual links
+ links = cleaned_links_string.split("', '")
+
+ # Format links into https format
+ https_links = [f"https://www.youtube.com{link}" for link in links]
+ all_links.extend(https_links)
+
+ return set(all_links)
+
+# ------ Display Video Links -----------------+
+def display_video_links(finance_points):
+ video_links = get_video_links(finance_points)
+
+ st.write("**VIDEO LINKS**")
+ with st.expander("📹Video Links📹"):
+ for link in video_links:
+ st_player(link)
+
+# ------ Display Articles -----------------+
+def get_articles(financial_points, num_articles=2):
+ articles = []
+
+ for concept in financial_points:
+ query = f"Explain the concept: {concept} "
+ search_url = f"https://www.google.com/search?q={query}&num={num_articles}"
+
+ response = requests.get(search_url)
+ soup = BeautifulSoup(response.content, "html.parser")
+
+ for result in soup.find_all("div", class_="tF2Cxc"):
+ link = result.find("a")["href"]
+ title = result.find("h3").get_text()
+ articles.append((title, link))
+
+ return articles
+
+def display_articles(financial_points):
+ articles = get_articles(financial_points)
+ st.write("**RELEVANT ARTICLES**")
+ with st.expander("📚 Relevant Articles 📚"):
+ for title, link in articles:
+ st.markdown(f"[{title}]({link})")
+
+# ------ Display SEBI Rules -----------------+
+def display_sebi_rules(financial_points):
+ time.sleep(21)
+ #Extract strictly financial terms
+ financial_points= re.sub(r'[^a-zA-Z, ]', '', financial_points)
+ llm = OpenAI(temperature=0.9)
+
+
+ # st.write(f"length: {len(financial_points.split(','))}")
+ for point in financial_points.split(","):
+ time.sleep(21)
+
+ st.markdown("
", unsafe_allow_html=True)
+ st.write(f"**SEBI rules for {point.upper()}:**")
+ fin_rules = llm(point + sebi_rules_prompt)
+ st.write(fin_rules)
+
+ st.markdown("
", unsafe_allow_html=True)
+# ------ IMAGE to TEXT -----------------+
+def get_text_from_image(image_file):
+ #Convert image to text
+ img = Image.open(image_file)
+ text = pytesseract.image_to_string(img)
+
+ return text
+
+
+# ------ CLAIM DETECTION PIPELINE -----------------+
+
+def claim_detection_pipeline(prompt):
+ transcribed_sentiment = sentiment_analyzer(prompt)
+ st.write("Transcribed Input Sentiment:", transcribed_sentiment)
+
+ # Model output
+ st.write("Model Output:")
+ llm = OpenAI(temperature=0.9)
+ response = llm(prompt + claim_prompt)
+ # Sentiment analysis for model output
+ model_output_sentiment = sentiment_analyzer(response)
+
+ st.write(response)
+ st.write("Model Output Sentiment:", model_output_sentiment)
+
+ return response
+
+
+# ------ QUESTION ANSWERING -----------------+
+def question_and_answer(prompt):
+ llm = OpenAI(temperature=0.9)
+ response = llm(prompt + "\nAnswer the above question.")
+ st.write(response)
+
+# ------ VIDEO to TEXT -----------------------------+
+# ---- audio detachment from video ----+
+def detach_audio(video_file):
+ videoclip = VideoFileClip(video_file)
+ # st.success("Video read!")
+ audio_clip = videoclip.audio
+ audio_clip.write_audiofile("audio.mp3")
+ audio_clip.close()
+ videoclip.close()
+ audio_file = open("audio.mp3", "rb")
+
+ return audio_file
+
+def video_to_text(video_file):
+ # st.success("Video uploaded!")
+ with open("uploaded_video.mp4", "wb") as f:
+ f.write(video_file.read())
+
+ audio_file = detach_audio("uploaded_video.mp4")
+ transcript = transcribe_audio(audio_file)
+ audio_file.close()
+
+ return transcript
+
+# --------------------- END ---------------------------------+
+
+
\ No newline at end of file