Skip to content

Commit 19d66fc

Browse files
authored
Develop (#3)
* first commit * black code * settings added * readme completed * workflow test * workflow test * workflow test * workflow test * codecov status * readme updated * fix pytube bug * fix imports * demo video added * demo video added * demo video added * demo video removed * fix config error issue #2
1 parent da2ffc9 commit 19d66fc

File tree

5 files changed

+53
-51
lines changed

5 files changed

+53
-51
lines changed

plugin.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
2-
"name": "Youtube 2 text",
3-
"version": "0.0.1",
2+
"name": "Youtube 2 Text",
3+
"version": "0.0.2",
44
"description": "A plugin to transcript a youtube video to text. Just past the video link and wait for the response!",
55
"author_name": "rafleze",
66
"author_url": "https://github.com/rafleze",

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
faster-whisper==1.0.1
2-
pytube==15.0.0
2+
yt-dlp==2025.2.19
33
ffmpeg-python==0.2.0

tests/test_transcriber.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,15 @@
44
from ..transcriber import transcribe
55

66

7-
@patch("youtube2text.transcriber.YouTube")
7+
@patch("youtube2text.transcriber.download_audio")
88
@patch("youtube2text.transcriber.WhisperModel")
9-
def test_transcribe_with_settings(mock_whisper_model, mock_youtube):
9+
def test_transcribe_with_settings(mock_whisper_model, mock_download_audio):
1010
"""Test transcribe function with settings."""
1111

1212
with open("tests/media/youtube/test.mp4", "w") as f:
1313
f.write("test")
1414

15-
mock_youtube.return_value.streams.filter.return_value.first.return_value.download.return_value = (
16-
"tests/media/youtube/test.mp4"
17-
)
15+
mock_download_audio.return_value = "tests/media/youtube/test.mp4"
1816
mock_whisper_model.return_value.transcribe.return_value = [
1917
{"text": "Hello", "start": 0.0, "end": 1.0},
2018
{"text": "World", "start": 1.0, "end": 2.0},
@@ -36,16 +34,14 @@ def test_transcribe_with_settings(mock_whisper_model, mock_youtube):
3634
)
3735

3836

39-
@patch("youtube2text.transcriber.YouTube")
40-
def test_transcribe_without_settings(mock_youtube):
37+
@patch("youtube2text.transcriber.download_audio")
38+
def test_transcribe_without_settings(mock_download_audio):
4139
"""Test transcribe function with settings."""
4240

4341
with open("tests/media/youtube/test.mp4", "w") as f:
4442
f.write("test")
4543

46-
mock_youtube.return_value.streams.filter.return_value.first.return_value.download.return_value = (
47-
"tests/media/youtube/test.mp4"
48-
)
44+
mock_download_audio.return_value = "tests/media/youtube/test.mp4"
4945
settings = {}
5046
with pytest.raises(Exception):
5147
transcribe(
@@ -55,14 +51,12 @@ def test_transcribe_without_settings(mock_youtube):
5551
)
5652

5753

58-
@patch("youtube2text.transcriber.YouTube")
59-
def test_transcribe_with_download_exception(mock_youtube):
54+
@patch("youtube2text.transcriber.download_audio")
55+
def test_transcribe_with_download_exception(mock_download_audio):
6056
"""Test transcribe function with settings."""
6157

6258
# mock youtube stream filter first download as exception
63-
mock_youtube.return_value.streams.filter.return_value.first.return_value.download.side_effect = Exception(
64-
"Download failed"
65-
)
59+
mock_download_audio.side_effect = Exception("Download failed")
6660
settings = {
6761
"model_size_or_path": "large-v3",
6862
"device": "cpu",

transcriber.py

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,48 @@
11
"""Transcribe a youtube video."""
2-
3-
from pytube import YouTube
42
from faster_whisper import WhisperModel
53
import tempfile
64
import os
7-
from .custom_cipher import Cipher as CustomCipher
8-
from unittest.mock import patch
5+
import yt_dlp
96

107

11-
@patch(
12-
"pytube.extract.Cipher", CustomCipher
13-
) # Patch the Cipher class because it's not working: look at the issue https://github.com/pytube/pytube/issues/1918
148
def transcribe(link, language="en", settings={}):
159
"""Transcribe a youtube video."""
1610
if not settings:
1711
raise Exception("No configuration found for Youtube2Text.")
1812
model_size_or_path = settings.get("model_size_or_path", "large-v3")
1913
device = settings.get("device", "cpu")
2014
compute_type = settings.get("compute_type", "int8")
15+
filename = download_audio(link)
16+
audio = open(filename, "rb")
17+
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
18+
temp_file.write(audio.read())
19+
temp_file_path = temp_file.name
20+
model = WhisperModel(
21+
model_size_or_path, device=device, compute_type=compute_type
22+
)
23+
result = model.transcribe(temp_file_path, language=language)
24+
os.remove(temp_file_path)
25+
os.remove(filename)
26+
return result
27+
28+
29+
def download_audio(link, output_folder="media/youtube"): # pragma: no cover
2130
try:
22-
yt = YouTube(link)
23-
filename = yt.streams.filter(only_audio=True).first().download("media/youtube")
31+
32+
inner_output_folder = os.path.join(os.path.dirname(__file__), output_folder)
33+
os.makedirs(inner_output_folder, exist_ok=True)
34+
35+
ydl_opts = {
36+
"format": "bestaudio/best",
37+
"outtmpl": f"{inner_output_folder}/%(title)s.%(ext)s",
38+
"noprogress": False,
39+
}
40+
41+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
42+
info_dict = ydl.extract_info(link, download=True)
43+
filename = ydl.prepare_filename(info_dict)
44+
45+
return filename
46+
2447
except Exception as e:
25-
raise e
26-
else:
27-
audio = open(filename, "rb")
28-
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
29-
temp_file.write(audio.read())
30-
temp_file_path = temp_file.name
31-
model = WhisperModel(
32-
model_size_or_path, device=device, compute_type=compute_type
33-
)
34-
result = model.transcribe(temp_file_path, language=language)
35-
os.remove(temp_file_path)
36-
os.remove(filename)
37-
return result
48+
raise e

youtube_to_text.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,11 @@ class TranscriptionForm(CatForm):
5656
def submit(self, form_data):
5757
"""Submit the form."""
5858
settings = self.cat.mad_hatter.plugins["youtube2text"].load_settings()
59-
try:
60-
segments, _ = transcribe(
61-
form_data["youtube_link"], form_data["language"], settings
62-
)
63-
result = "".join([s.text for s in segments])
64-
prompt = f"Summerize the following text: {result}"
65-
summary = self.cat.llm(prompt)
66-
output = f"The transcription is: \n{result}\n\nSummary: {summary}"
67-
return {"output": output}
68-
except Exception as e:
69-
return {"output": f"An error occurred: {e}"}
59+
segments, _ = transcribe(
60+
form_data["youtube_link"], form_data["language"], settings
61+
)
62+
result = "".join([s.text for s in segments])
63+
prompt = f"Summerize the following text: {result}"
64+
summary = self.cat.llm(prompt)
65+
output = f"The transcription is: \n{result}\n\nSummary: {summary}"
66+
return {"output": output}

0 commit comments

Comments
 (0)