|
| 1 | +<?php |
| 2 | + |
| 3 | +namespace App\Console\Commands; |
| 4 | + |
| 5 | +use DOMDocument; |
| 6 | +use DOMNode; |
| 7 | +use Illuminate\Console\Command; |
| 8 | +use League\HTMLToMarkdown\Converter\ConverterInterface; |
| 9 | +use League\HTMLToMarkdown\ElementInterface; |
| 10 | +use League\HTMLToMarkdown\HtmlConverter; |
| 11 | +use Illuminate\Support\Str; |
| 12 | +use SebastianBergmann\CodeCoverage\Report\PHP; |
| 13 | + |
| 14 | +class GenerateLlmsTxt extends Command |
| 15 | +{ |
| 16 | + protected $signature = 'generate:llms-txt'; |
| 17 | + |
| 18 | + protected $description = 'Generate llms.txt and llms-full.txt files'; |
| 19 | + |
| 20 | + protected array $pages = []; |
| 21 | + |
| 22 | + protected array $full = []; |
| 23 | + |
| 24 | + protected array $tabbedCodeBlocks = []; |
| 25 | + |
| 26 | + protected array $codeBlocks = []; |
| 27 | + |
| 28 | + protected string $currentCodeBlockId = ''; |
| 29 | + |
| 30 | + public function handle() |
| 31 | + { |
| 32 | + $this->writeNav(); |
| 33 | + |
| 34 | + foreach ($this->pages as $page) { |
| 35 | + $this->writePage($page); |
| 36 | + } |
| 37 | + |
| 38 | + file_put_contents( |
| 39 | + public_path('llms-full.txt'), |
| 40 | + collect(explode(PHP_EOL, implode(PHP_EOL, $this->full)))->map(fn($line) => trim($line))->implode(PHP_EOL), |
| 41 | + ); |
| 42 | + } |
| 43 | + |
| 44 | + public function writePage(string $url) |
| 45 | + { |
| 46 | + $url = ltrim($url, '/'); |
| 47 | + |
| 48 | + if ($url === '') { |
| 49 | + return; |
| 50 | + } |
| 51 | + |
| 52 | + $page = file_get_contents(resource_path('js/Pages/' . $url . '.jsx')); |
| 53 | + |
| 54 | + $replace = [ |
| 55 | + '<>' => '', |
| 56 | + '</>' => '', |
| 57 | + "{' '}" => ' ', |
| 58 | + '<Link' => '<a', |
| 59 | + '</Link>' => '</a>', |
| 60 | + '<Notice>' => '<p>', |
| 61 | + '</Notice>' => '</p>', |
| 62 | + ]; |
| 63 | + |
| 64 | + $codeBlockType = null; |
| 65 | + $inFencedCodeBlock = false; |
| 66 | + |
| 67 | + $page = str($page) |
| 68 | + ->after('return (') |
| 69 | + ->beforeLast(')') |
| 70 | + ->replace(array_keys($replace), array_values($replace)) |
| 71 | + ->replaceMatches('/className=\{[^}]*\}/', '') |
| 72 | + ->explode(PHP_EOL) |
| 73 | + ->map(function ($line) use (&$codeBlockType, &$inFencedCodeBlock) { |
| 74 | + if (str_contains($line, '<TabbedCode')) { |
| 75 | + $codeBlockType = 'tabbedcode'; |
| 76 | + |
| 77 | + $this->currentCodeBlockId = Str::random(10); |
| 78 | + $this->tabbedCodeBlocks[$this->currentCodeBlockId] = $line; |
| 79 | + |
| 80 | + return '<p><tabbedcode>' . $this->currentCodeBlockId . '</tabbedcode></p>'; |
| 81 | + } |
| 82 | + |
| 83 | + if (str_contains($line, '<CodeBlock')) { |
| 84 | + $codeBlockType = 'codeblock'; |
| 85 | + |
| 86 | + $this->currentCodeBlockId = Str::random(10); |
| 87 | + $this->codeBlocks[$this->currentCodeBlockId] = $line; |
| 88 | + |
| 89 | + return '<p><codeblock>' . $this->currentCodeBlockId . '</codeblock></p>'; |
| 90 | + } |
| 91 | + |
| 92 | + if ($codeBlockType === null) { |
| 93 | + return str($line)->replaceMatches('/\s+/', ' ')->replaceMatches('/\{\'<(.+)>\'\}/', '<$1>')->trim()->toString(); |
| 94 | + } |
| 95 | + |
| 96 | + if (str_contains($line, '`')) { |
| 97 | + $inFencedCodeBlock = !$inFencedCodeBlock; |
| 98 | + } |
| 99 | + |
| 100 | + try { |
| 101 | + if ($codeBlockType === 'tabbedcode') { |
| 102 | + $this->tabbedCodeBlocks[$this->currentCodeBlockId] .= $line; |
| 103 | + } else { |
| 104 | + $this->codeBlocks[$this->currentCodeBlockId] .= $line; |
| 105 | + } |
| 106 | + } catch (\Exception $e) { |
| 107 | + dd($this->tabbedCodeBlocks, $this->codeBlocks, $codeBlockType, $line); |
| 108 | + } |
| 109 | + |
| 110 | + if ($inFencedCodeBlock) { |
| 111 | + // Don't look for ending tag inside fenced code blocks |
| 112 | + return null; |
| 113 | + } |
| 114 | + |
| 115 | + if (str_contains($line, '/>')) { |
| 116 | + $codeBlockType = null; |
| 117 | + } |
| 118 | + |
| 119 | + return null; |
| 120 | + }) |
| 121 | + ->filter(fn($line) => $line !== null && trim($line) !== '') |
| 122 | + ->implode(PHP_EOL); |
| 123 | + |
| 124 | + $converter = new HtmlConverter(['header_style' => 'atx', 'hard_break' => true, 'remove_nodes' => 'div']); |
| 125 | + $converter->getEnvironment()->addConverter(new class($this->tabbedCodeBlocks, $this->codeBlocks) implements ConverterInterface { |
| 126 | + public function __construct(protected array $tabbedBlocks, protected array $codeBlocks) |
| 127 | + { |
| 128 | + // |
| 129 | + } |
| 130 | + |
| 131 | + public function convert(ElementInterface $node): string |
| 132 | + { |
| 133 | + if ($node->getTagName() === 'tabbedcode') { |
| 134 | + $content = $this->tabbedBlocks[$node->getValue()]; |
| 135 | + |
| 136 | + return str($content)->after('examples={')->beforeLast('}')->explode('`,')->map(function ($example) { |
| 137 | + if (!str_contains($example, 'code:')) { |
| 138 | + return null; |
| 139 | + } |
| 140 | + |
| 141 | + preg_match("/language: '([^']+)'/", $example, $matches); |
| 142 | + $language = $matches[1]; |
| 143 | + |
| 144 | + preg_match("/name: '([^']+)'/", $example, $matches); |
| 145 | + $name = $matches[1]; |
| 146 | + |
| 147 | + preg_match("/description: '([^']+)'/m", $example, $matches); |
| 148 | + $description = $matches[1] ?? null; |
| 149 | + |
| 150 | + $code = str($example)->after('dedent`')->beforeLast('`,')->trim()->replaceMatches('/\s{7,}/', PHP_EOL)->toString(); |
| 151 | + |
| 152 | + return sprintf("%s%s:\n\n```%s\n%s\n```", $name, $description ? ' (' . $description . ')' : '', $language, $code); |
| 153 | + })->filter()->implode(PHP_EOL . PHP_EOL); |
| 154 | + } |
| 155 | + |
| 156 | + $content = $this->codeBlocks[$node->getValue()]; |
| 157 | + preg_match('/language="([^"]+)"/', $content, $matches); |
| 158 | + $language = $matches[1]; |
| 159 | + preg_match('/dedent`([^`]+)`/', $content, $matches); |
| 160 | + $code = str($matches[1])->trim()->replaceMatches('/\s{7,}/', PHP_EOL)->toString(); |
| 161 | + |
| 162 | + return sprintf("```%s\n%s\n```", $language, $code); |
| 163 | + } |
| 164 | + |
| 165 | + public function getSupportedTags(): array |
| 166 | + { |
| 167 | + return [ |
| 168 | + 'tabbedcode', |
| 169 | + 'codeblock', |
| 170 | + ]; |
| 171 | + } |
| 172 | + }); |
| 173 | + |
| 174 | + $markdown = $converter->convert($page); |
| 175 | + |
| 176 | + $this->full[] = $markdown; |
| 177 | + } |
| 178 | + |
| 179 | + public function writeNav() |
| 180 | + { |
| 181 | + $nav = file_get_contents(resource_path('js/Components/Nav.jsx')); |
| 182 | + |
| 183 | + $nav = str($nav) |
| 184 | + ->afterLast('return (') |
| 185 | + ->beforeLast(')') |
| 186 | + ->replaceMatches('/className=\{[^}]*\}/', '') |
| 187 | + ->replace(['<Link', '</Link>'], ['<a', '</a>']) |
| 188 | + ->toString(); |
| 189 | + |
| 190 | + $doc = new DOMDocument(); |
| 191 | + |
| 192 | + libxml_use_internal_errors(true); |
| 193 | + $doc->loadHTML($nav); |
| 194 | + libxml_clear_errors(); |
| 195 | + |
| 196 | + $md = []; |
| 197 | + |
| 198 | + $inList = false; |
| 199 | + |
| 200 | + $walk = function (DOMNode $node) use (&$walk, &$md, &$inList) { |
| 201 | + if ($node->nodeType === XML_ELEMENT_NODE) { |
| 202 | + if ($inList === false && $node->nodeName === 'ul') { |
| 203 | + $inList = true; |
| 204 | + } |
| 205 | + |
| 206 | + if ($node->nodeName === 'a') { |
| 207 | + $url = $node->attributes->getNamedItem('href')->nodeValue; |
| 208 | + |
| 209 | + if (!str_starts_with($url, 'http')) { |
| 210 | + $this->pages[] = $url; |
| 211 | + $url = url($url); |
| 212 | + $md[] = sprintf('- [%s](%s)', trim($node->textContent), $url); |
| 213 | + } |
| 214 | + } |
| 215 | + |
| 216 | + if ($inList === true && $node->nodeName === 'div') { |
| 217 | + $md[] = ''; |
| 218 | + $md[] = '## ' . trim($node->textContent); |
| 219 | + } |
| 220 | + } |
| 221 | + |
| 222 | + foreach ($node->childNodes as $child) { |
| 223 | + $walk($child); |
| 224 | + } |
| 225 | + }; |
| 226 | + |
| 227 | + $walk($doc->documentElement); |
| 228 | + |
| 229 | + file_put_contents(public_path('llms.txt'), implode(PHP_EOL, $md)); |
| 230 | + |
| 231 | + return Command::SUCCESS; |
| 232 | + } |
| 233 | +} |
0 commit comments