feat: add fasttokens benchmarks and tokenizer backend docs #22526
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Docs link check | |
| on: | |
| push: | |
| branches: | |
| - main | |
| pull_request: | |
| permissions: | |
| contents: read | |
| jobs: | |
| lychee: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check out repository | |
| uses: actions/checkout@v4 | |
| - name: Restore lychee cache | |
| id: restore-cache | |
| uses: actions/cache/restore@v5 | |
| with: | |
| path: .lycheecache | |
| key: cache-lychee-${{ github.run_id }} | |
| restore-keys: cache-lychee- | |
| - name: Check documentation links with lychee | |
| uses: lycheeverse/lychee-action@v2 | |
| with: | |
| args: >- | |
| --cache | |
| --no-progress | |
| --max-retries 4 | |
| --retry-wait-time 3 | |
| --timeout 30 | |
| --root-dir ${{ github.workspace }} | |
| --exclude-path ".*ATTRIBUTIONS.*" | |
| --exclude-path "./lib/llm/tests/data/.*" | |
| --accept "200..=299, 403, 429" | |
| --exclude-all-private | |
| --exclude 0.0.0.0 | |
| --verbose | |
| --host-concurrency 10 | |
| --host-request-interval 1s | |
| --host-stats | |
| ${{ github.event_name == 'pull_request' && '--offline' || '' }} | |
| . | |
| fail: true | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| - name: Strip cached errors before saving | |
| if: always() && github.ref == 'refs/heads/main' | |
| run: | | |
| if [ -f .lycheecache ]; then | |
| grep -E ',2[0-9]{2},' .lycheecache > .lycheecache.tmp || true | |
| cutoff=$(( $(date +%s) - 93600 )) | |
| stale=$(awk -F',' -v c="$cutoff" '$3 < c' .lycheecache.tmp) | |
| if [ -n "$stale" ]; then | |
| echo "Removing stale cache entries (>26h old):" | |
| echo "$stale" | |
| awk -F',' -v c="$cutoff" '$3 >= c' .lycheecache.tmp > .lycheecache.tmp2 || true | |
| mv .lycheecache.tmp2 .lycheecache.tmp | |
| fi | |
| mv .lycheecache.tmp .lycheecache | |
| fi | |
| - name: Save lychee cache | |
| uses: actions/cache/save@v5 | |
| if: always() && github.ref == 'refs/heads/main' | |
| with: | |
| path: .lycheecache | |
| key: cache-lychee-${{ github.run_id }} | |
| broken-links-check: | |
| name: Check for broken markdown links | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check out repository | |
| uses: actions/checkout@v4 | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| - name: Install Python dependencies | |
| run: | | |
| python -m pip install --upgrade pip | |
| # No additional dependencies needed for the basic script | |
| - name: Run broken links check | |
| id: check-links | |
| run: | | |
| echo "Running broken links check on documentation files..." | |
| # Run the broken links detection script and capture exit code | |
| set +e # Don't exit immediately on error | |
| python3 .github/workflows/detect_broken_links.py \ | |
| --verbose \ | |
| --format json \ | |
| --check-symlinks \ | |
| --output broken-links-report.json \ | |
| . | |
| exit_code=$? | |
| set -e # Re-enable exit on error | |
| # Check if the script found any broken links (exit code 1 means broken links found) | |
| if [ $exit_code -eq 1 ]; then | |
| echo "::error::Broken links found in documentation files" | |
| echo "broken_links_found=true" >> $GITHUB_OUTPUT | |
| elif [ $exit_code -eq 0 ]; then | |
| echo "::notice::No broken links found" | |
| echo "broken_links_found=false" >> $GITHUB_OUTPUT | |
| else | |
| echo "::error::Script failed with exit code $exit_code" | |
| echo "broken_links_found=error" >> $GITHUB_OUTPUT | |
| exit $exit_code | |
| fi | |
| - name: Display broken links and create annotations | |
| if: steps.check-links.outputs.broken_links_found == 'true' | |
| run: | | |
| echo "::group::🔗 Broken Links Found" | |
| # Parse and display the JSON report in a readable format, plus create GitHub annotations | |
| python3 -c " | |
| import json | |
| import sys | |
| import os | |
| try: | |
| with open('broken-links-report.json', 'r') as f: | |
| report = json.load(f) | |
| summary = report['summary'] | |
| broken_links = report['broken_links'] | |
| problematic_symlinks = report.get('problematic_symlinks', {}) | |
| has_broken_links = bool(broken_links) | |
| has_problematic_symlinks = bool(problematic_symlinks) | |
| if has_broken_links: | |
| print('❌ BROKEN LINKS DETECTED') | |
| print('=' * 50) | |
| print(f'📊 Summary:') | |
| print(f' • Total files processed: {summary[\"total_files_processed\"]}') | |
| print(f' • Files with broken links: {summary[\"files_with_broken_links\"]}') | |
| print(f' • Total broken links: {summary[\"total_broken_links\"]}') | |
| print() | |
| if broken_links: | |
| print('🔍 Detailed Broken Links Report:') | |
| print('-' * 40) | |
| for file_path, links in broken_links.items(): | |
| print(f'\\n📄 File: {file_path}') | |
| print(f' {len(links)} broken link(s) found') | |
| print() | |
| for i, link in enumerate(links, 1): | |
| line = link['line'] | |
| link_text = link['link_text'] | |
| link_url = link['link_url'] | |
| error_reason = link.get('error_reason', 'Link target not found') | |
| github_url = link.get('github_url', '') | |
| # Create GitHub annotation for each broken link | |
| annotation_msg = f'Broken link: [{link_text}]({link_url}) - {error_reason}' | |
| if github_url: | |
| annotation_msg += f' - View: {github_url}' | |
| print(f'::error file={file_path},line={line}::{annotation_msg}') | |
| # Display in workflow output | |
| print(f' {i}. Line {line}: [{link_text}]({link_url})') | |
| print(f' ❌ {error_reason}') | |
| if github_url: | |
| print(f' 🔗 View on GitHub: {github_url}') | |
| else: | |
| print(f' 📍 Target: {link_url}') | |
| print() | |
| if has_problematic_symlinks: | |
| if has_broken_links: | |
| print('\\n' + '=' * 50) | |
| print('🔗 PROBLEMATIC SYMBOLIC LINKS DETECTED') | |
| print('=' * 50) | |
| print(f'📊 Summary:') | |
| print(f' • Total problematic symlinks: {summary.get(\"total_problematic_symlinks\", 0)}') | |
| print() | |
| for category, symlinks in problematic_symlinks.items(): | |
| if symlinks: | |
| category_icons = { | |
| 'broken': '💔', | |
| 'circular': '🔄', | |
| 'external': '🌐', | |
| 'suspicious': '⚠️' | |
| } | |
| icon = category_icons.get(category, '❓') | |
| print(f'{icon} {category.upper()} SYMLINKS ({len(symlinks)}):') | |
| print('-' * 40) | |
| for i, symlink in enumerate(symlinks, 1): | |
| symlink_path = symlink['symlink_path'] | |
| target_path = symlink['target_path'] | |
| issue = symlink['issue'] | |
| # Create GitHub annotation for each problematic symlink | |
| annotation_msg = f'Problematic symlink: {issue}' | |
| print(f'::error file={symlink_path}::{annotation_msg}') | |
| # Display in workflow output | |
| print(f' {i}. {symlink_path}') | |
| print(f' → {target_path}') | |
| print(f' ❌ {issue}') | |
| print() | |
| print('=' * 50) | |
| print('✅ Next Steps:') | |
| if has_broken_links: | |
| print('1. Check the annotations above in the Files Changed tab (for PRs)') | |
| print('2. Click the GitHub links to jump directly to each broken link') | |
| print('3. Fix all broken links before merging') | |
| if has_problematic_symlinks: | |
| step_num = 4 if has_broken_links else 1 | |
| print(f'{step_num}. Review and fix problematic symbolic links') | |
| print(f'{step_num + 1}. Consider replacing broken symlinks with actual files or fixing targets') | |
| print(f'{step_num + 2}. Evaluate if suspicious symlinks with many traversals are necessary') | |
| final_step = (7 if has_broken_links and has_problematic_symlinks | |
| else 4 if has_broken_links | |
| else 4 if has_problematic_symlinks | |
| else 1) | |
| print(f'{final_step}. Re-run this workflow to verify fixes') | |
| except Exception as e: | |
| print(f'❌ Error reading broken links report: {e}') | |
| sys.exit(1) | |
| " | |
| echo "::endgroup::" | |
| - name: Alert for broken links | |
| if: steps.check-links.outputs.broken_links_found == 'true' || steps.check-links.outputs.broken_links_found == 'error' | |
| run: | | |
| if [ "${{ steps.check-links.outputs.broken_links_found }}" = "error" ]; then | |
| echo "::error::Workflow failed due to script error" | |
| echo "Please check the workflow logs for details." | |
| exit 1 | |
| else | |
| echo "::error::❌ WORKFLOW FAILED: Broken links found in documentation files" | |
| echo "" | |
| echo "🔍 What to do next:" | |
| echo "1. Check the 'Display broken links and create annotations' step above for details" | |
| echo "2. Look for ::error annotations in the Files Changed tab (for PRs)" | |
| echo "3. Click the GitHub URLs in the workflow output to jump directly to each broken link" | |
| echo "4. Fix all broken links before merging" | |
| echo "" | |
| echo "💡 Each broken link shows:" | |
| echo " - File name and line number where the broken link is located" | |
| echo " - The broken link text and target URL" | |
| echo " - A clickable GitHub URL to jump directly to the problem line" | |
| exit 1 | |
| fi |