Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Fincred-verify/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
videos/
__pycache__/
Empty file added Fincred-verify/__init__.py
Empty file.
16 changes: 16 additions & 0 deletions Fincred-verify/celery_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

from celery import Celery
from config import settings

celery = Celery(
'tasks',
broker=settings.CELERY_BROKER_URL,
backend=settings.CELERY_RESULT_BACKEND,
include=['get_youtube']
)

celery.conf.update(
task_serializer='json',
result_serializer='json',
accept_content=['json'],
)
13 changes: 13 additions & 0 deletions Fincred-verify/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os
from dotenv import load_dotenv


load_dotenv()


class Config:
CELERY_BROKER_URL: str = os.environ.get("CELERY_BROKER_URL","redis://127.0.0.1:6379/0")
CELERY_RESULT_BACKEND: str = os.environ.get("CELERY_RESULT_BACKEND","redis://127.0.0.1:6379/0")


settings = Config()
196 changes: 196 additions & 0 deletions Fincred-verify/get_youtube.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@

import openai
import requests
import asyncio
from pytube import YouTube
import subprocess
from celery_config import celery
from celery.result import AsyncResult
from celery import current_task, Celery
from test_recog import get_transcript_custom
import speech_recognition as sr
import os
api_key = "AIzaSyB5DLRK21lNFdP3B3QPvj6S6ZTqOj0u_b8"
openai.api_key = "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"
class CaptionsError(Exception):
"""Custom exception for errors related to fetching captions."""
def __init__(self, message="Error fetching captions"):
self.message = message
super().__init__(self.message)

class RatingError(Exception):
"""Custom exception for errors related to fetching rating."""
def __init__(self, message="Error fetching rating"):
self.message = message
super().__init__(self.message)
@celery.task
def get_influencer_and_channel_details(video_id):


# Step 1: Get the channelId using the Videos.list API call
video_api_url = f'https://www.googleapis.com/youtube/v3/videos?id={video_id}&key={api_key}&part=snippet'
video_response = requests.get(video_api_url).json()
if not video_response.get('items'):
return "Invalid video link or video not found"

channel_id = video_response['items'][0]['snippet']['channelId']

# Step 2: Get channel details using the Channels.list API call
channel_api_url = f'https://www.googleapis.com/youtube/v3/channels?id={channel_id}&key={api_key}&part=snippet,statistics'
channel_response = requests.get(channel_api_url).json()
if not channel_response.get('items'):
return "Channel details not found"

channel_data = channel_response['items'][0]
channel_snippet = channel_data['snippet']

# Extracting details
channel_title = channel_snippet.get('title', '')
channel_description = channel_snippet.get('description', '')
channel_published_at = channel_snippet.get('publishedAt', '')
channel_subscriber_count = channel_data['statistics'].get('subscriberCount', '')

return {
"channel_title": channel_title,
"channel_description": channel_description,
"channel_published_at": channel_published_at,
"channel_subscriber_count": channel_subscriber_count
}

def get_captions(id):
url = f"https://www.googleapis.com/youtube/v3/captions?videoId={id}&key={api_key}&part=snippet"
response = requests.get(url)
response_json = response.json()
items = response_json.get("items")
if items:
caption_url = items[0]['snippet']['trackKind']
if caption_url:
return caption_url
return None
else:
print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.")
return None

def get_comments(id):
url = f"https://www.googleapis.com/youtube/v3/commentThreads?videoId={id}&key={api_key}&part=snippet"
response = requests.get(url)
response_json = response.json()
items = response_json.get("items")
if items:
comments = []
for item in items:
comment = item['snippet']['topLevelComment']['snippet']['textOriginal']
comments.append(comment)
return comments
else:
print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.")
return None

def get_title(id):
url = f"https://www.googleapis.com/youtube/v3/videos?id={id}&key={api_key}&part=snippet"
response = requests.get(url)
response_json = response.json()
items = response_json.get("items")
if items:
title = items[0]['snippet']['title']
if title:
return title
return None
else:
print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.")
return None
def get_activities(id):
url = f"https://www.googleapis.com/youtube/v3/activities?channelId={id}&key={api_key}&part=snippet"
response = requests.get(url)
response_json = response.json()
items = response_json.get("items")
if items:
activities = []
for item in items:
activity = item['snippet']['title']
activities.append(activity)
return activities
else:
print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.")
return None

def get_rating(id):
url = f"https://www.googleapis.com/youtube/v3/videos/getRating?id={id}&key={api_key}&part=snippet"
response = requests.get(url)
response_json = response.json()
items = response_json.get("items")
if items:
rating = items[0]['snippet']['rating']
if rating:
return rating
return None
else:
print("Couldn't retrieve video details. Ensure the video ID is correct and you have quota on the API.")
return None

def download_youtube(id, task_id):
print(f"Downloading YouTube video for Task ID: {task_id}")
try:
download_dir = "videos"
yt = YouTube('http://youtube.com/watch?v=' + id)
video_stream = yt.streams.filter(res="360p").first()
video_stream.download(output_path=download_dir)
video_filename = video_stream.default_filename
audio_filename = os.path.splitext(video_filename)[0] + ".wav"
subprocess.run(["ffmpeg", "-i", os.path.join(download_dir, video_filename), "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "2", os.path.join(download_dir, audio_filename)])
transcript = get_transcript_custom(os.path.join(download_dir, audio_filename))
except Exception as e:
print(f"Error in Task ID {task_id}: {e}")
transcribed_text = None
return transcribed_text
return transcript



@celery.task
def handle_get_youtube(id):
captions= None
influencer_and_channel_details = None
try:
title = get_title(id)
comments = get_comments(id)
influencer_and_channel_details = get_influencer_and_channel_details(id)
captions = None
# rating = get_rating(id)
if captions is None:
print("Error fetching captions, downloading video to transcribe")
task_id = current_task.request.id
download_result = download_youtube(id, task_id)
# download_result.wait()
print("Transcribed audio, deleting video")
captions = None
return {"title": title, "comments": comments, "captions": captions,"special":download_result,"influencer_and_channel_details":influencer_and_channel_details}
except Exception as e:
print(e)
if captions is None:
print("Error fetching captions, downloading video to transcribe")
task_id = current_task.request.id
download_result = download_youtube(id, task_id)
# download_result.wait()
print("Transcribed audio, deleting video")
captions = None
return {"title": title, "comments": comments, "captions": captions,"special":download_result,"influencer_and_channel_details":influencer_and_channel_details}

return {"title": title, "comments": comments, "captions": captions,"influencer_and_channel_details":"influencer_and_channel_details"}
import requests



# Example usage:
# api_key = 'YOUR_YOUTUBE_API_KEY'
# video_link = 'https://www.youtube.com/watch?v=VIDEO_ID'
# print(get_youtube_video_details(video_link, api_key))


if __name__ == "__main__":
asyncio.run(download_youtube("3VSG0S08-C0","bd18b175-543e-4d81-a43f-5bc08cb8ca7c"))
# print(get_captions("3VSG0S08-C0"))
# print(get_comments("3VSG0S08-C0"))
# print(get_title("3VSG0S08-C0"))
# # print(get_activities("UCBR8-60-B28hp2BmDPdntcQ"))
# print(get_rating("3VSG0S08-C0"))
73 changes: 73 additions & 0 deletions Fincred-verify/langchain-analytics/chains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from langchain.tools import DuckDuckGoSearchRun
search = DuckDuckGoSearchRun()
from langchain.prompts import PromptTemplate
from langchain.chains import SequentialChain
from langchain.chains import LLMChain
from langchain.tools import BaseTool
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI
from langchain.agents import Tool
from langchain.agents import initialize_agent
import os

# Replace 'YOUR_API_KEY_ENV_VARIABLE' with the actual name of your environment variable
api_key = os.getenv('OPENAI_API_KEY')

if api_key:
print(f'Your OpenAI API key is: {api_key}')
else:
print('API key not found. Please set the environment variable.')

llm = OpenAI(temperature=0.7, openai_api_key = api_key)
summ_template = """
You are a fact-checker and analyst. Given a text transcript, your task is to provide a concise summary of its main points while emphasizing any potential fakeness or false claims.

Text Transcript: {transcript}

Fact-Checker's Summary:
"""
sum_prompt_template = PromptTemplate(input_variables=["transcript"], template=summ_template)
summary_chain = LLMChain(llm=llm, prompt=sum_prompt_template, output_key="summary")

com_template = """
You are a comment analyst. Given a set of comments, your task is to provide a concise summary of the main points while emphasizing any potential fakeness or false claims in the comments. Additionally, assess whether the audience appears to be informed and if they have added relevant context to their comments.

Comments:
{comments}

Comment Analyst's Summary:
"""
com_prompt_template = PromptTemplate(input_variables=["comments"], template=com_template)
comment_chain = LLMChain(llm=llm, prompt=com_prompt_template, output_key="comment_summary")

simplify_chain = SequentialChain(
chains=[summary_chain, comment_chain],
input_variables=["comments", "transcript"],
output_variables=["comment_summary", "summary"],
verbose=True)



tools = [search]
agent = initialize_agent(tools,
llm,
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True)

def get_Insights(transcript, comments):
return simplify_chain({"comments":comments, "transcript":transcript})

def run_agent(transcript):
return agent.run("Given the provided text transcript, please perform the following tasks:\n\n"
"1. Identify any claims made in the text.\n"
"2. Conduct a search to gather insights related to the claims.\n"
"3. Warn about potential risks associated with the content that are not discussed in the text.\n"
"4. Provide an assessment of the benefits mentioned in the text.\n\n"
"Return the following:\n"
"- A list of identified claims.\n"
"- Insights gathered from the search.\n"
"- Warnings about potential risks.\n"
"- An assessment of benefits mentioned in the text." + transcript)

35 changes: 35 additions & 0 deletions Fincred-verify/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import time
from fastapi import FastAPI
from celery_config import celery
from get_youtube import handle_get_youtube
from celery.result import AsyncResult
# import scipy
# from celery.result import AsyncResult
app = FastAPI()




@app.get("/")
async def root():
return {"message": "Welcome to FinCredVerify!"}

@app.get("/analyze_video/{video_id}")
async def get_video_title(video_id):
task = handle_get_youtube.apply_async(args=[video_id])

# Return an initial message
return {"message": "Analyzing video... (Task ID: {})".format(task.id)}
@app.get("/task_status/{task_id}")
async def get_task_status(task_id):
result = AsyncResult(task_id, app=celery)
if result.ready():
result_value = result.result
return {"message": "Video analysis completed", "result": result_value}
else:
return {"message": "Video analysis in progress"}

if __name__ == "__main__":
import uvicorn
uvicorn.run(app, port=8000)

Loading