hack2skill · pranjalkar99 · Aug 24, 2023 · Aug 24, 2023 · Aug 24, 2023 · Aug 24, 2023
diff --git a/Fincred-verify/.gitignore b/Fincred-verify/.gitignore
@@ -0,0 +1,2 @@
+videos/
+__pycache__/
diff --git a/Fincred-verify/__init__.py b/Fincred-verify/__init__.py
diff --git a/Fincred-verify/celery_config.py b/Fincred-verify/celery_config.py
@@ -0,0 +1,16 @@
+
+from celery import  Celery
+from config import settings
+
+celery = Celery(
+    'tasks',
+    broker=settings.CELERY_BROKER_URL,
+    backend=settings.CELERY_RESULT_BACKEND,
+    include=['get_youtube']
+)
+
+celery.conf.update(
+    task_serializer='json',
+    result_serializer='json',
+    accept_content=['json'],
+)
diff --git a/Fincred-verify/config.py b/Fincred-verify/config.py
@@ -0,0 +1,13 @@
+import os
+from dotenv import load_dotenv
+
+
+load_dotenv()
+
+
+class Config:
+	CELERY_BROKER_URL: str = os.environ.get("CELERY_BROKER_URL","redis://127.0.0.1:6379/0")
+	CELERY_RESULT_BACKEND: str = os.environ.get("CELERY_RESULT_BACKEND","redis://127.0.0.1:6379/0")
+
+
+settings = Config()
diff --git a/Fincred-verify/get_youtube.py b/Fincred-verify/get_youtube.py
@@ -0,0 +1,196 @@
+
+import openai
+import requests
+import asyncio
+from pytube import YouTube
+import subprocess
+from celery_config import celery
+from celery.result import AsyncResult
+from celery import current_task, Celery
+from test_recog import get_transcript_custom
+import speech_recognition as sr
+import os
+api_key = "AIzaSyB5DLRK21lNFdP3B3QPvj6S6ZTqOj0u_b8"
+openai.api_key = "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"
+class CaptionsError(Exception):
+    """Custom exception for errors related to fetching captions."""
+    def __init__(self, message="Error fetching captions"):
+        self.message = message
+        super().__init__(self.message)
+
+class RatingError(Exception):
+    """Custom exception for errors related to fetching rating."""
+    def __init__(self, message="Error fetching rating"):
+        self.message = message
+        super().__init__(self.message)
+@celery.task
+def get_influencer_and_channel_details(video_id):
+
+
+    # Step 1: Get the channelId using the Videos.list API call
+    video_api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&key={api_key}&part=snippet'
+    video_response = requests.get(video_api_url).json()
+    if not video_response.get('items'):
+        return "Invalid video link or video not found"
+
+    channel_id = video_response['items'][0]['snippet']['channelId']
+
+    # Step 2: Get channel details using the Channels.list API call
+    channel_api_url = f'https://www.googleapis.com/youtube/v3/channels?id={channel_id}&key={api_key}&part=snippet,statistics'
+    channel_response = requests.get(channel_api_url).json()
+    if not channel_response.get('items'):
+        return "Channel details not found"
+
+    channel_data = channel_response['items'][0]
+    channel_snippet = channel_data['snippet']
+
+    # Extracting details
+    channel_title = channel_snippet.get('title', '')
+    channel_description = channel_snippet.get('description', '')
+    channel_published_at = channel_snippet.get('publishedAt', '')
+    channel_subscriber_count = channel_data['statistics'].get('subscriberCount', '')
+
+    return {
+        "channel_title": channel_title,
+        "channel_description": channel_description,
+        "channel_published_at": channel_published_at,
+        "channel_subscriber_count": channel_subscriber_count
+    }
+
+def get_captions(id):
+    url = f"https://www.googleapis.com/youtube/v3/captions?videoId={id}&key={api_key}&part=snippet"
+    response = requests.get(url)
+    response_json = response.json()
+    items = response_json.get("items")
+    if items:
+        caption_url = items[0]['snippet']['trackKind']
+        if caption_url:
+            return caption_url
+        return None
+    else:
+        print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.")
+        return None
+
+def get_comments(id):
+    url = f"https://www.googleapis.com/youtube/v3/commentThreads?videoId={id}&key={api_key}&part=snippet"
+    response = requests.get(url)
+    response_json = response.json()
+    items = response_json.get("items")
+    if items:
+        comments = []
+        for item in items:
+            comment = item['snippet']['topLevelComment']['snippet']['textOriginal']
+            comments.append(comment)
+        return comments
+    else:
+        print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.")
+        return None
+
+def get_title(id):
+    url = f"https://www.googleapis.com/youtube/v3/videos?id={id}&key={api_key}&part=snippet"
+    response = requests.get(url)
+    response_json = response.json()
+    items = response_json.get("items")
+    if items:
+        title = items[0]['snippet']['title']
+        if title:
+            return title
+        return None
+    else:
+        print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.")
+        return None
+def get_activities(id):
+    url = f"https://www.googleapis.com/youtube/v3/activities?channelId={id}&key={api_key}&part=snippet"
+    response = requests.get(url)
+    response_json = response.json()
+    items = response_json.get("items")
+    if items:
+        activities = []
+        for item in items:
+            activity = item['snippet']['title']
+            activities.append(activity)
+        return activities
+    else:
+        print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.")
+        return None
+
+def get_rating(id):
+    url = f"https://www.googleapis.com/youtube/v3/videos/getRating?id={id}&key={api_key}&part=snippet"
+    response = requests.get(url)
+    response_json = response.json()
+    items = response_json.get("items")
+    if items:
+        rating = items[0]['snippet']['rating']
+        if rating:
+            return rating
+        return None
+    else:
+        print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.")
+        return None
+
+def download_youtube(id, task_id):
+    print(f"Downloading YouTube video for Task ID: {task_id}")
+    try:
+        download_dir = "videos"
+        yt = YouTube('http://youtube.com/watch?v=' + id)
+        video_stream = yt.streams.filter(res="360p").first()
+        video_stream.download(output_path=download_dir)
+        video_filename = video_stream.default_filename
+        audio_filename = os.path.splitext(video_filename)[0] + ".wav"
+        subprocess.run(["ffmpeg", "-i", os.path.join(download_dir, video_filename), "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2", os.path.join(download_dir, audio_filename)])
+        transcript = get_transcript_custom(os.path.join(download_dir, audio_filename))
+    except Exception as e:
+        print(f"Error in Task ID {task_id}: {e}")
+        transcribed_text = None
+        return transcribed_text
+    return transcript
+
+
+
+@celery.task
+def handle_get_youtube(id):
+    captions= None
+    influencer_and_channel_details = None
+    try:
+        title = get_title(id)
+        comments = get_comments(id)
+        influencer_and_channel_details = get_influencer_and_channel_details(id)
+        captions = None
+        # rating = get_rating(id)
+        if captions is None:
+            print("Error fetching captions, downloading video to transcribe")
+            task_id = current_task.request.id
+            download_result = download_youtube(id, task_id)
+            # download_result.wait() 
+            print("Transcribed audio, deleting video")
+            captions = None
+            return {"title": title, "comments": comments, "captions": captions,"special":download_result,"influencer_and_channel_details":influencer_and_channel_details}
+    except Exception as e:
+        print(e)
+        if captions is None:
+            print("Error fetching captions, downloading video to transcribe")
+            task_id = current_task.request.id
+            download_result = download_youtube(id, task_id)
+            # download_result.wait() 
+            print("Transcribed audio, deleting video")
+            captions = None
+            return {"title": title, "comments": comments, "captions": captions,"special":download_result,"influencer_and_channel_details":influencer_and_channel_details}
+
+    return {"title": title, "comments": comments, "captions": captions,"influencer_and_channel_details":"influencer_and_channel_details"}
+import requests
+
+
+
+# Example usage:
+# api_key = 'YOUR_YOUTUBE_API_KEY'
+# video_link = 'https://www.youtube.com/watch?v=VIDEO_ID'
+# print(get_youtube_video_details(video_link, api_key))
+
+
+if __name__ == "__main__":
+    asyncio.run(download_youtube("3VSG0S08-C0","bd18b175-543e-4d81-a43f-5bc08cb8ca7c"))
+    # print(get_captions("3VSG0S08-C0"))
+    # print(get_comments("3VSG0S08-C0"))
+    # print(get_title("3VSG0S08-C0"))
+    # # print(get_activities("UCBR8-60-B28hp2BmDPdntcQ"))
+    # print(get_rating("3VSG0S08-C0"))
diff --git a/Fincred-verify/langchain-analytics/chains.py b/Fincred-verify/langchain-analytics/chains.py
@@ -0,0 +1,73 @@
+from langchain.tools import DuckDuckGoSearchRun
+search = DuckDuckGoSearchRun()
+from langchain.prompts import PromptTemplate
+from langchain.chains import SequentialChain
+from langchain.chains import LLMChain
+from langchain.tools import BaseTool
+from langchain.agents import load_tools
+from langchain.agents import initialize_agent
+from langchain.agents import AgentType
+from langchain.llms import OpenAI
+from langchain.agents import Tool
+from langchain.agents import initialize_agent
+import os
+
+# Replace 'YOUR_API_KEY_ENV_VARIABLE' with the actual name of your environment variable
+api_key = os.getenv('OPENAI_API_KEY')
+
+if api_key:
+    print(f'Your OpenAI API key is: {api_key}')
+else:
+    print('API key not found. Please set the environment variable.')
+
+llm = OpenAI(temperature=0.7, openai_api_key = api_key)
+summ_template = """
+You are a fact-checker and analyst. Given a text transcript, your task is to provide a concise summary of its main points while emphasizing any potential fakeness or false claims.
+
+Text Transcript: {transcript}
+
+Fact-Checker's Summary:
+"""
+sum_prompt_template = PromptTemplate(input_variables=["transcript"], template=summ_template)
+summary_chain = LLMChain(llm=llm, prompt=sum_prompt_template, output_key="summary")
+
+com_template = """
+You are a comment analyst. Given a set of comments, your task is to provide a concise summary of the main points while emphasizing any potential fakeness or false claims in the comments. Additionally, assess whether the audience appears to be informed and if they have added relevant context to their comments.
+
+Comments:
+{comments}
+
+Comment Analyst's Summary:
+"""
+com_prompt_template = PromptTemplate(input_variables=["comments"], template=com_template)
+comment_chain = LLMChain(llm=llm, prompt=com_prompt_template, output_key="comment_summary")
+
+simplify_chain = SequentialChain(
+    chains=[summary_chain, comment_chain],
+    input_variables=["comments", "transcript"],
+    output_variables=["comment_summary", "summary"],
+    verbose=True)
+
+
+
+tools = [search]
+agent = initialize_agent(tools,
+                         llm,
+                         agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+                         verbose=True)
+
+def get_Insights(transcript, comments):
+    return simplify_chain({"comments":comments, "transcript":transcript})
+
+def run_agent(transcript):
+    return agent.run("Given the provided text transcript, please perform the following tasks:\n\n"
+              "1. Identify any claims made in the text.\n"
+              "2. Conduct a search to gather insights related to the claims.\n"
+              "3. Warn about potential risks associated with the content that are not discussed in the text.\n"
+              "4. Provide an assessment of the benefits mentioned in the text.\n\n"
+              "Return the following:\n"
+              "- A list of identified claims.\n"
+              "- Insights gathered from the search.\n"
+              "- Warnings about potential risks.\n"
+              "- An assessment of benefits mentioned in the text." + transcript)
+
diff --git a/Fincred-verify/main.py b/Fincred-verify/main.py
@@ -0,0 +1,35 @@
+import time
+from fastapi import FastAPI
+from celery_config import celery
+from get_youtube import handle_get_youtube
+from celery.result import AsyncResult
+# import scipy
+# from celery.result import AsyncResult
+app = FastAPI()
+
+
+
+
+@app.get("/")
+async def root():
+    return {"message": "Welcome to FinCredVerify!"}
+
+@app.get("/analyze_video/{video_id}")
+async def get_video_title(video_id):
+    task = handle_get_youtube.apply_async(args=[video_id])
+
+    # Return an initial message
+    return {"message": "Analyzing video... (Task ID: {})".format(task.id)}
+@app.get("/task_status/{task_id}")
+async def get_task_status(task_id):
+    result = AsyncResult(task_id, app=celery)
+    if result.ready():
+        result_value = result.result
+        return {"message": "Video analysis completed", "result": result_value}
+    else:
+        return {"message": "Video analysis in progress"}
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, port=8000)
+