Skip to content

Commit 1527a44

Browse files
committed
bug #48 fix: youtube transcipt auth issue by using ready-made lib (chr-hertel)
This PR was merged into the main branch. Discussion ---------- fix: youtube transcipt auth issue by using ready-made lib | Q | A | ------------- | --- | Bug fix? | yes | New feature? | no | Docs? | no | Issues | | License | MIT Cherry picking php-llm/llm-chain#353 Commits ------- 8314df4 fix: youtube transcipt auth issue by using ready-made lib (#353)
2 parents d5a0b5c + 8314df4 commit 1527a44

File tree

3 files changed

+18
-44
lines changed

3 files changed

+18
-44
lines changed

examples/composer.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
"async-aws/bedrock-runtime": "^1.1",
99
"codewithkyrian/transformers": "^0.5.3",
1010
"doctrine/dbal": "^3.3|^4.0",
11+
"php-http/discovery": "^1.20",
1112
"probots-io/pinecone-php": "^1.1",
13+
"psr/http-factory-implementation": "*",
1214
"symfony/ai-agent": "@dev",
1315
"symfony/ai-platform": "@dev",
1416
"symfony/ai-store": "@dev",
@@ -34,7 +36,8 @@
3436
],
3537
"config": {
3638
"allow-plugins": {
37-
"codewithkyrian/transformers-libsloader": true
39+
"codewithkyrian/transformers-libsloader": true,
40+
"php-http/discovery": true
3841
}
3942
}
4043
}

src/agent/composer.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
"symfony/type-info": "^7.2.3"
3434
},
3535
"require-dev": {
36+
"mrmysql/youtube-transcript": "^v0.0.5",
3637
"phpstan/phpstan": "^2.0",
3738
"phpunit/phpunit": "^11.5.13",
3839
"symfony/ai-store": "@dev",
@@ -41,9 +42,9 @@
4142
"symfony/event-dispatcher": "^6.4 || ^7.1"
4243
},
4344
"suggest": {
45+
"mrmysql/youtube-transcript": "For using the YouTube transcription tool.",
4446
"symfony/ai-store": "For using Similarity Search with a vector store.",
45-
"symfony/css-selector": "For using the YouTube transcription tool.",
46-
"symfony/dom-crawler": "For using the YouTube transcription tool."
47+
"symfony/dom-crawler": "For using the Crawler tool."
4748
},
4849
"config": {
4950
"sort-packages": true

src/agent/src/Toolbox/Tool/YouTubeTranscriber.php

Lines changed: 11 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@
1111

1212
namespace Symfony\AI\Agent\Toolbox\Tool;
1313

14+
use MrMySQL\YoutubeTranscript\TranscriptListFetcher;
1415
use Symfony\AI\Agent\Exception\LogicException;
15-
use Symfony\AI\Agent\Exception\RuntimeException;
1616
use Symfony\AI\Agent\Toolbox\Attribute\AsTool;
17-
use Symfony\Component\CssSelector\CssSelectorConverter;
18-
use Symfony\Component\DomCrawler\Crawler;
17+
use Symfony\Component\HttpClient\Psr18Client;
1918
use Symfony\Contracts\HttpClient\HttpClientInterface;
2019

2120
/**
@@ -27,11 +26,8 @@
2726
public function __construct(
2827
private HttpClientInterface $client,
2928
) {
30-
if (!class_exists(Crawler::class)) {
31-
throw new LogicException('The Symfony DomCrawler component is required to use this tool. Try running "composer require symfony/dom-crawler".');
32-
}
33-
if (!class_exists(CssSelectorConverter::class)) {
34-
throw new LogicException('The Symfony CSS Selector component is required to use this tool. Try running "composer require symfony/css-selector".');
29+
if (!class_exists(TranscriptListFetcher::class)) {
30+
throw new LogicException('The package `mrmysql/youtube-transcript` is required to use this tool. Try running "composer require mrmysql/youtube-transcript".');
3531
}
3632
}
3733

@@ -40,40 +36,14 @@ public function __construct(
4036
*/
4137
public function __invoke(string $videoId): string
4238
{
43-
// Fetch the HTML content of the YouTube video page
44-
$htmlResponse = $this->client->request('GET', 'https://youtube.com/watch?v='.$videoId);
45-
$html = $htmlResponse->getContent();
46-
47-
// Use DomCrawler to parse the HTML
48-
$crawler = new Crawler($html);
49-
50-
// Extract the script containing the ytInitialPlayerResponse
51-
$scriptContent = $crawler->filter('script')->reduce(function (Crawler $node) {
52-
return str_contains($node->text(), 'var ytInitialPlayerResponse = {');
53-
})->text();
54-
55-
// Extract and parse the JSON data from the script
56-
$start = strpos($scriptContent, 'var ytInitialPlayerResponse = ') + \strlen('var ytInitialPlayerResponse = ');
57-
$dataString = substr($scriptContent, $start);
58-
$dataString = substr($dataString, 0, strrpos($dataString, ';') ?: null);
59-
$data = json_decode(trim($dataString), true);
60-
61-
// Extract the URL for the captions
62-
if (!isset($data['captions']['playerCaptionsTracklistRenderer']['captionTracks'][0]['baseUrl'])) {
63-
throw new RuntimeException('Captions are not available for this video.');
64-
}
65-
$captionsUrl = $data['captions']['playerCaptionsTracklistRenderer']['captionTracks'][0]['baseUrl'];
66-
67-
// Fetch and parse the captions XML
68-
$xmlResponse = $this->client->request('GET', $captionsUrl);
69-
$xmlContent = $xmlResponse->getContent();
70-
$xmlCrawler = new Crawler($xmlContent);
39+
$psr18Client = new Psr18Client($this->client);
40+
$fetcher = new TranscriptListFetcher($psr18Client, $psr18Client, $psr18Client);
7141

72-
// Collect all text elements from the captions
73-
$transcript = $xmlCrawler->filter('text')->each(function (Crawler $node) {
74-
return $node->text().' YouTubeTranscriber.php';
75-
});
42+
$list = $fetcher->fetch($videoId);
43+
$transcript = $list->findTranscript($list->getAvailableLanguageCodes());
7644

77-
return implode(\PHP_EOL, $transcript);
45+
return array_reduce($transcript->fetch(), function (string $carry, array $item): string {
46+
return $carry.\PHP_EOL.$item['text'];
47+
}, '');
7848
}
7949
}

0 commit comments

Comments
 (0)