diff --git a/Fincred-verify/.gitignore b/Fincred-verify/.gitignore new file mode 100644 index 0000000..9871c22 --- /dev/null +++ b/Fincred-verify/.gitignore @@ -0,0 +1,2 @@ +videos/ +__pycache__/ \ No newline at end of file diff --git a/Fincred-verify/__init__.py b/Fincred-verify/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Fincred-verify/celery_config.py b/Fincred-verify/celery_config.py new file mode 100644 index 0000000..8db5703 --- /dev/null +++ b/Fincred-verify/celery_config.py @@ -0,0 +1,16 @@ + +from celery import Celery +from config import settings + +celery = Celery( + 'tasks', + broker=settings.CELERY_BROKER_URL, + backend=settings.CELERY_RESULT_BACKEND, + include=['get_youtube'] +) + +celery.conf.update( + task_serializer='json', + result_serializer='json', + accept_content=['json'], +) diff --git a/Fincred-verify/config.py b/Fincred-verify/config.py new file mode 100644 index 0000000..af56a5c --- /dev/null +++ b/Fincred-verify/config.py @@ -0,0 +1,13 @@ +import os +from dotenv import load_dotenv + + +load_dotenv() + + +class Config: + CELERY_BROKER_URL: str = os.environ.get("CELERY_BROKER_URL","redis://127.0.0.1:6379/0") + CELERY_RESULT_BACKEND: str = os.environ.get("CELERY_RESULT_BACKEND","redis://127.0.0.1:6379/0") + + +settings = Config() \ No newline at end of file diff --git a/Fincred-verify/get_youtube.py b/Fincred-verify/get_youtube.py new file mode 100644 index 0000000..c4ee903 --- /dev/null +++ b/Fincred-verify/get_youtube.py @@ -0,0 +1,196 @@ + +import openai +import requests +import asyncio +from pytube import YouTube +import subprocess +from celery_config import celery +from celery.result import AsyncResult +from celery import current_task, Celery +from test_recog import get_transcript_custom +import speech_recognition as sr +import os +api_key = "AIzaSyB5DLRK21lNFdP3B3QPvj6S6ZTqOj0u_b8" +openai.api_key = "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" +class CaptionsError(Exception): + """Custom exception for errors related to fetching captions.""" + def __init__(self, message="Error fetching captions"): + self.message = message + super().__init__(self.message) + +class RatingError(Exception): + """Custom exception for errors related to fetching rating.""" + def __init__(self, message="Error fetching rating"): + self.message = message + super().__init__(self.message) +@celery.task +def get_influencer_and_channel_details(video_id): + + + # Step 1: Get the channelId using the Videos.list API call + video_api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&key={api_key}&part=snippet' + video_response = requests.get(video_api_url).json() + if not video_response.get('items'): + return "Invalid video link or video not found" + + channel_id = video_response['items'][0]['snippet']['channelId'] + + # Step 2: Get channel details using the Channels.list API call + channel_api_url = f'https://www.googleapis.com/youtube/v3/channels?id={channel_id}&key={api_key}&part=snippet,statistics' + channel_response = requests.get(channel_api_url).json() + if not channel_response.get('items'): + return "Channel details not found" + + channel_data = channel_response['items'][0] + channel_snippet = channel_data['snippet'] + + # Extracting details + channel_title = channel_snippet.get('title', '') + channel_description = channel_snippet.get('description', '') + channel_published_at = channel_snippet.get('publishedAt', '') + channel_subscriber_count = channel_data['statistics'].get('subscriberCount', '') + + return { + "channel_title": channel_title, + "channel_description": channel_description, + "channel_published_at": channel_published_at, + "channel_subscriber_count": channel_subscriber_count + } + +def get_captions(id): + url = f"https://www.googleapis.com/youtube/v3/captions?videoId={id}&key={api_key}&part=snippet" + response = requests.get(url) + response_json = response.json() + items = response_json.get("items") + if items: + caption_url = items[0]['snippet']['trackKind'] + if caption_url: + return caption_url + return None + else: + print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.") + return None + +def get_comments(id): + url = f"https://www.googleapis.com/youtube/v3/commentThreads?videoId={id}&key={api_key}&part=snippet" + response = requests.get(url) + response_json = response.json() + items = response_json.get("items") + if items: + comments = [] + for item in items: + comment = item['snippet']['topLevelComment']['snippet']['textOriginal'] + comments.append(comment) + return comments + else: + print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.") + return None + +def get_title(id): + url = f"https://www.googleapis.com/youtube/v3/videos?id={id}&key={api_key}&part=snippet" + response = requests.get(url) + response_json = response.json() + items = response_json.get("items") + if items: + title = items[0]['snippet']['title'] + if title: + return title + return None + else: + print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.") + return None +def get_activities(id): + url = f"https://www.googleapis.com/youtube/v3/activities?channelId={id}&key={api_key}&part=snippet" + response = requests.get(url) + response_json = response.json() + items = response_json.get("items") + if items: + activities = [] + for item in items: + activity = item['snippet']['title'] + activities.append(activity) + return activities + else: + print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.") + return None + +def get_rating(id): + url = f"https://www.googleapis.com/youtube/v3/videos/getRating?id={id}&key={api_key}&part=snippet" + response = requests.get(url) + response_json = response.json() + items = response_json.get("items") + if items: + rating = items[0]['snippet']['rating'] + if rating: + return rating + return None + else: + print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.") + return None + +def download_youtube(id, task_id): + print(f"Downloading YouTube video for Task ID: {task_id}") + try: + download_dir = "videos" + yt = YouTube('http://youtube.com/watch?v=' + id) + video_stream = yt.streams.filter(res="360p").first() + video_stream.download(output_path=download_dir) + video_filename = video_stream.default_filename + audio_filename = os.path.splitext(video_filename)[0] + ".wav" + subprocess.run(["ffmpeg", "-i", os.path.join(download_dir, video_filename), "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2", os.path.join(download_dir, audio_filename)]) + transcript = get_transcript_custom(os.path.join(download_dir, audio_filename)) + except Exception as e: + print(f"Error in Task ID {task_id}: {e}") + transcribed_text = None + return transcribed_text + return transcript + + + +@celery.task +def handle_get_youtube(id): + captions= None + influencer_and_channel_details = None + try: + title = get_title(id) + comments = get_comments(id) + influencer_and_channel_details = get_influencer_and_channel_details(id) + captions = None + # rating = get_rating(id) + if captions is None: + print("Error fetching captions, downloading video to transcribe") + task_id = current_task.request.id + download_result = download_youtube(id, task_id) + # download_result.wait() + print("Transcribed audio, deleting video") + captions = None + return {"title": title, "comments": comments, "captions": captions,"special":download_result,"influencer_and_channel_details":influencer_and_channel_details} + except Exception as e: + print(e) + if captions is None: + print("Error fetching captions, downloading video to transcribe") + task_id = current_task.request.id + download_result = download_youtube(id, task_id) + # download_result.wait() + print("Transcribed audio, deleting video") + captions = None + return {"title": title, "comments": comments, "captions": captions,"special":download_result,"influencer_and_channel_details":influencer_and_channel_details} + + return {"title": title, "comments": comments, "captions": captions,"influencer_and_channel_details":"influencer_and_channel_details"} +import requests + + + +# Example usage: +# api_key = 'YOUR_YOUTUBE_API_KEY' +# video_link = 'https://www.youtube.com/watch?v=VIDEO_ID' +# print(get_youtube_video_details(video_link, api_key)) + + +if __name__ == "__main__": + asyncio.run(download_youtube("3VSG0S08-C0","bd18b175-543e-4d81-a43f-5bc08cb8ca7c")) + # print(get_captions("3VSG0S08-C0")) + # print(get_comments("3VSG0S08-C0")) + # print(get_title("3VSG0S08-C0")) + # # print(get_activities("UCBR8-60-B28hp2BmDPdntcQ")) + # print(get_rating("3VSG0S08-C0")) \ No newline at end of file diff --git a/Fincred-verify/langchain-analytics/chains.py b/Fincred-verify/langchain-analytics/chains.py new file mode 100644 index 0000000..a99c2ca --- /dev/null +++ b/Fincred-verify/langchain-analytics/chains.py @@ -0,0 +1,73 @@ +from langchain.tools import DuckDuckGoSearchRun +search = DuckDuckGoSearchRun() +from langchain.prompts import PromptTemplate +from langchain.chains import SequentialChain +from langchain.chains import LLMChain +from langchain.tools import BaseTool +from langchain.agents import load_tools +from langchain.agents import initialize_agent +from langchain.agents import AgentType +from langchain.llms import OpenAI +from langchain.agents import Tool +from langchain.agents import initialize_agent +import os + +# Replace 'YOUR_API_KEY_ENV_VARIABLE' with the actual name of your environment variable +api_key = os.getenv('OPENAI_API_KEY') + +if api_key: + print(f'Your OpenAI API key is: {api_key}') +else: + print('API key not found. Please set the environment variable.') + +llm = OpenAI(temperature=0.7, openai_api_key = api_key) +summ_template = """ +You are a fact-checker and analyst. Given a text transcript, your task is to provide a concise summary of its main points while emphasizing any potential fakeness or false claims. + +Text Transcript: {transcript} + +Fact-Checker's Summary: +""" +sum_prompt_template = PromptTemplate(input_variables=["transcript"], template=summ_template) +summary_chain = LLMChain(llm=llm, prompt=sum_prompt_template, output_key="summary") + +com_template = """ +You are a comment analyst. Given a set of comments, your task is to provide a concise summary of the main points while emphasizing any potential fakeness or false claims in the comments. Additionally, assess whether the audience appears to be informed and if they have added relevant context to their comments. + +Comments: +{comments} + +Comment Analyst's Summary: +""" +com_prompt_template = PromptTemplate(input_variables=["comments"], template=com_template) +comment_chain = LLMChain(llm=llm, prompt=com_prompt_template, output_key="comment_summary") + +simplify_chain = SequentialChain( + chains=[summary_chain, comment_chain], + input_variables=["comments", "transcript"], + output_variables=["comment_summary", "summary"], + verbose=True) + + + +tools = [search] +agent = initialize_agent(tools, + llm, + agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, + verbose=True) + +def get_Insights(transcript, comments): + return simplify_chain({"comments":comments, "transcript":transcript}) + +def run_agent(transcript): + return agent.run("Given the provided text transcript, please perform the following tasks:\n\n" + "1. Identify any claims made in the text.\n" + "2. Conduct a search to gather insights related to the claims.\n" + "3. Warn about potential risks associated with the content that are not discussed in the text.\n" + "4. Provide an assessment of the benefits mentioned in the text.\n\n" + "Return the following:\n" + "- A list of identified claims.\n" + "- Insights gathered from the search.\n" + "- Warnings about potential risks.\n" + "- An assessment of benefits mentioned in the text." + transcript) + diff --git a/Fincred-verify/main.py b/Fincred-verify/main.py new file mode 100644 index 0000000..296ccaf --- /dev/null +++ b/Fincred-verify/main.py @@ -0,0 +1,35 @@ +import time +from fastapi import FastAPI +from celery_config import celery +from get_youtube import handle_get_youtube +from celery.result import AsyncResult +# import scipy +# from celery.result import AsyncResult +app = FastAPI() + + + + +@app.get("/") +async def root(): + return {"message": "Welcome to FinCredVerify!"} + +@app.get("/analyze_video/{video_id}") +async def get_video_title(video_id): + task = handle_get_youtube.apply_async(args=[video_id]) + + # Return an initial message + return {"message": "Analyzing video... (Task ID: {})".format(task.id)} +@app.get("/task_status/{task_id}") +async def get_task_status(task_id): + result = AsyncResult(task_id, app=celery) + if result.ready(): + result_value = result.result + return {"message": "Video analysis completed", "result": result_value} + else: + return {"message": "Video analysis in progress"} + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, port=8000) + diff --git a/Fincred-verify/requirements.txt b/Fincred-verify/requirements.txt new file mode 100644 index 0000000..371aee6 --- /dev/null +++ b/Fincred-verify/requirements.txt @@ -0,0 +1,72 @@ +aiohttp==3.8.5 +aiosignal==1.3.1 +amqp==5.1.1 +annotated-types==0.5.0 +anyio==3.7.1 +assemblyai==0.17.0 +async-timeout==4.0.3 +attrs==23.1.0 +billiard==4.1.0 +celery==5.3.1 +certifi==2023.7.22 +charset-normalizer==3.2.0 +click==8.1.6 +click-didyoumean==0.3.0 +click-plugins==1.1.1 +click-repl==0.3.0 +dnspython==2.4.2 +email-validator==2.0.0.post2 +exceptiongroup==1.1.3 +fastapi==0.101.1 +filelock==3.12.2 +frozenlist==1.4.0 +fsspec==2023.6.0 +h11==0.14.0 +httpcore==0.17.3 +httptools==0.6.0 +httpx==0.24.1 +idna==3.4 +itsdangerous==2.1.2 +Jinja2==3.1.2 +kombu==5.3.1 +MarkupSafe==2.1.3 +multidict==6.0.4 +numpy==1.25.2 +openai==0.27.8 +orjson==3.9.4 +packaging==23.1 +prompt-toolkit==3.0.39 +pydantic==2.1.1 +pydantic-extra-types==2.0.0 +pydantic-settings==2.0.3 +pydantic_core==2.4.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-multipart==0.0.6 +pytube==15.0.0 +PyYAML==6.0.1 +redis==5.0.0 +regex==2023.8.8 +requests==2.31.0 +safetensors==0.3.2 +scipy==1.11.1 +six==1.16.0 +sniffio==1.3.0 +SpeechRecognition==3.10.0 +starlette==0.27.0 +tokenizers==0.13.3 +tqdm==4.66.1 +typing_extensions==4.7.1 +tzdata==2023.3 +ujson==5.8.0 +urllib3==2.0.4 +uvicorn==0.23.2 +uvloop==0.17.0 +vine==5.0.0 +watchfiles==0.19.0 +wcwidth==0.2.6 +websockets==11.0.3 +yarl==1.9.2 +langchain +openai +duckduckgo-search diff --git a/Fincred-verify/test_recog.py b/Fincred-verify/test_recog.py new file mode 100644 index 0000000..cb8d410 --- /dev/null +++ b/Fincred-verify/test_recog.py @@ -0,0 +1,21 @@ +import os +import assemblyai as aai + + +# Set your AssemblyAI API key + + +def get_transcript_custom(audio_filename): + if not os.path.exists(audio_filename): + print("Couldn't find audio file. Ensure the audio file is in the same directory as this script.") + return None + aai.settings.api_key = "7dbc87cd28864fe5bd70b6a227c3f3e5" + transcriber = aai.Transcriber() + + transcript = transcriber.transcribe(audio_filename) + + print("Transcript:") + print(transcript.text) + with open("transcript.txt", "w") as f: + f.write(transcript.text) + return transcript.text diff --git a/Fincred-verify/texts/.ssh b/Fincred-verify/texts/.ssh new file mode 100644 index 0000000..e69de29 diff --git a/Fincred-verify/transcript.txt b/Fincred-verify/transcript.txt new file mode 100644 index 0000000..e69de29 diff --git a/README.md b/README.md index d23b4aa..4c552dd 100644 --- a/README.md +++ b/README.md @@ -9,18 +9,36 @@ ## README.md must consist of the following information: -#### Team Name - -#### Problem Statement - -#### Team Leader Email - +#### Team Name - FinCredVerify +#### Problem Statement - Identifying Misleading Claims +#### Team Leader Email - pranjalkar99.work@gmail.com ## A Brief of the Prototype: - This section must include UML Diagrams and prototype description - + The Idea is to make a ui-friendly website where users put the link of the youtube/instagram video/shorts,etc. The website will utilize APIs to gather essential data,including text transcripts, captions, influencer profiles, and comments. A Langchain-powered agent will analyze the text and other data using Language Models (LLMs).Based on the analysis, the AI model will assign a score and classify the content asverified or not. The aim is to streamline the verification process using advancedtechnology and provide users with reliable insights. +## Business Opportunity ## +Streamlined Verification: +Simplify content verification by allowing users to input videolinks, leveraging advanced Language Models for accurate analysis, and delivering clearauthenticity scores, empowering users to make informed decisions. +Freemium Model & Partnerships: +Offer basic verification for free, enticing users toupgrade to a premium plan for advanced insights. Forge partnerships with brands andagencies for sponsored content validation, creating revenue streams. +Data-Driven Growth: +Utilize user data for continuous AI model improvement andgenerate valuable insights. Offer reports and analyses for sale, potentially partnering withsocial media platforms for seamless integration and expanding market reach. + ## Tech Stack: - List Down all technologies used to Build the prototype - + FASTAPI, for building backend api +Vuejs , for building frontend +Tensorflow, Nltk, huggingface: for building the comment sections sentiments-analyzer +Langchain: for chaining LLM model, with search agents for fact-check, as well as retrievalagent on documents of SEBI +LLM model: for analysis, like openai, cohere, palm +APIs of youtube and instagram for getting the video, text, caption, and other content fromthe URL + klsw ## Step-by-Step Code Execution Instructions: This Section must contain a set of instructions required to clone and run the prototype so that it can be tested and deeply analyzed - + `python -m venv env` + `source env/bin/activate` + `pip install -r requirements.txt` + `uvicorn main:app --reload` + + + ## What I Learned: - Write about the biggest learning you had while developing the prototype + Building LLM applications and Openai, Langchain chaininga agents, to make Generative AI applications.