diff --git a/.github/workflows/templates/tinytex-comment-body.md b/.github/workflows/templates/tinytex-comment-body.md new file mode 100644 index 0000000000..e36410e484 --- /dev/null +++ b/.github/workflows/templates/tinytex-comment-body.md @@ -0,0 +1,12 @@ +## Update: {{DATE}} + +New pattern changes detected. + +
+Click to expand diff + +```diff +{{DIFF}} +``` + +
diff --git a/.github/workflows/templates/tinytex-issue-body.md b/.github/workflows/templates/tinytex-issue-body.md new file mode 100644 index 0000000000..5dde77d3b7 --- /dev/null +++ b/.github/workflows/templates/tinytex-issue-body.md @@ -0,0 +1,32 @@ +## TinyTeX Pattern Update: {{DATE}} + +The daily TinyTeX regex patterns have changed and need review. + +### Pattern Diff + +
+Click to expand diff + +```diff +{{DIFF}} +``` + +
+ +### Next Steps + +See [dev-docs/tinytex-pattern-maintenance.md](./dev-docs/tinytex-pattern-maintenance.md) for detailed instructions. + +**Review checklist:** + +- [ ] Review diff for significant changes +- [ ] Determine if patterns need adaptation +- [ ] Update `parse-error.ts` if needed +- [ ] Add/update filter functions +- [ ] Run tests: `unit\latexmk\parse-error.test.ts` +- [ ] Add test cases for new patterns if needed +- [ ] Close this issue when complete + +--- + +_Generated by [verify-tinytex-patterns.yml](./.github/workflows/verify-tinytex-patterns.yml)_ diff --git a/.github/workflows/verify-tinytex-patterns.yml b/.github/workflows/verify-tinytex-patterns.yml new file mode 100644 index 0000000000..29fb1359c8 --- /dev/null +++ b/.github/workflows/verify-tinytex-patterns.yml @@ -0,0 +1,177 @@ +name: Verify TinyTeX Pattern Coverage + +on: + schedule: + - cron: '0 2 * * *' # Daily 2am UTC (matches TinyTeX daily release) + workflow_dispatch: # Manual trigger for testing + +permissions: + contents: read + issues: write + actions: write + +jobs: + verify: + name: Check TinyTeX Pattern Updates + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Download and extract regex.json + env: + GH_TOKEN: ${{ github.token }} + run: | + if ! gh release download daily --repo rstudio/tinytex-releases --pattern "regex.tar.gz"; then + echo "::warning::Failed to download TinyTeX daily release - may not be published yet" + exit 0 + fi + tar -xzf regex.tar.gz + echo "✓ Downloaded and extracted regex.json" + + - name: Restore cached regex.json + id: cache-restore + uses: actions/cache/restore@v4 + with: + path: .cache/regex.json + key: tinytex-regex-latest + + - name: Compare versions + id: compare + run: | + if [ -f .cache/regex.json ]; then + if git diff --no-index --quiet .cache/regex.json regex.json; then + echo "changed=false" >> $GITHUB_OUTPUT + echo "first_run=false" >> $GITHUB_OUTPUT + echo "✓ No changes detected" + else + echo "changed=true" >> $GITHUB_OUTPUT + echo "first_run=false" >> $GITHUB_OUTPUT + echo "✗ Changes detected" + git diff --no-index .cache/regex.json regex.json > pattern-diff.txt || true + fi + else + echo "changed=false" >> $GITHUB_OUTPUT + echo "first_run=true" >> $GITHUB_OUTPUT + echo "⚠ No cached version (first run)" + fi + + - name: Handle first run + if: steps.compare.outputs.first_run == 'true' + env: + GH_TOKEN: ${{ github.token }} + run: | + # Get tinytex commit SHA + TINYTEX_COMMIT=$(gh api repos/rstudio/tinytex/commits/main --jq '.sha') + TINYTEX_SHORT=$(echo $TINYTEX_COMMIT | cut -c1-7) + + # Count patterns and categories + PATTERN_COUNT=$(jq '[.[] | length] | add' regex.json) + CATEGORY_COUNT=$(jq 'keys | length' regex.json) + + # Write GitHub Actions summary + echo "## TinyTeX Pattern Baseline Established" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "- **Date:** $(date +%Y-%m-%d)" >> "$GITHUB_STEP_SUMMARY" + echo "- **TinyTeX commit:** [\`$TINYTEX_SHORT\`](https://github.com/rstudio/tinytex/commit/$TINYTEX_COMMIT)" >> "$GITHUB_STEP_SUMMARY" + echo "- **Pattern source:** [R/latex.R](https://github.com/rstudio/tinytex/blob/$TINYTEX_COMMIT/R/latex.R)" >> "$GITHUB_STEP_SUMMARY" + echo "- **Baseline:** $PATTERN_COUNT patterns across $CATEGORY_COUNT categories" >> "$GITHUB_STEP_SUMMARY" + echo "- **Cache key:** tinytex-regex-latest" >> "$GITHUB_STEP_SUMMARY" + echo "" >> "$GITHUB_STEP_SUMMARY" + echo "No issue created (first run - baseline established)." >> "$GITHUB_STEP_SUMMARY" + + # Prepare cache directory + mkdir -p .cache + cp regex.json .cache/regex.json + + echo "✓ Baseline established - cache will be saved" + + - name: Exit if unchanged + if: steps.compare.outputs.changed == 'false' && steps.compare.outputs.first_run == 'false' + run: | + echo "No pattern changes detected. Cache hit - exiting." + exit 0 + + - name: Prepare readable diff + if: steps.compare.outputs.changed == 'true' + run: | + # Pretty-print both JSON files for readable diff + if [ -f .cache/regex.json ]; then + jq --sort-keys . .cache/regex.json > old-formatted.json + jq --sort-keys . regex.json > new-formatted.json + git diff --no-index old-formatted.json new-formatted.json > readable-diff.txt || true + else + jq --sort-keys . regex.json > new-formatted.json + echo "First run - no previous version to compare" > readable-diff.txt + fi + + - name: Create or update issue + if: steps.compare.outputs.changed == 'true' + env: + GH_TOKEN: ${{ github.token }} + run: | + ISSUE_TITLE="TinyTeX patterns require review" + CURRENT_DATE=$(date +%Y-%m-%d) + + # Search for existing open issue + ISSUE_NUM=$(gh issue list \ + --label "tinytex-patterns" \ + --state open \ + --json number,title \ + --jq ".[] | select(.title == \"$ISSUE_TITLE\") | .number") + + if [ -z "$ISSUE_NUM" ]; then + echo "No matching issue found, creating new one..." + + # Use template and replace placeholders + sed "s|{{DATE}}|$CURRENT_DATE|g" .github/workflows/templates/tinytex-issue-body.md | \ + sed -e "/{{DIFF}}/r readable-diff.txt" -e "/{{DIFF}}/d" > issue-body.md + + gh issue create \ + --title "$ISSUE_TITLE" \ + --assignee cderv \ + --label "tinytex-patterns" \ + --body-file issue-body.md + else + echo "Found existing issue #$ISSUE_NUM, adding comment..." + + # Use template and replace placeholders + sed "s|{{DATE}}|$CURRENT_DATE|g" .github/workflows/templates/tinytex-comment-body.md | \ + sed -e "/{{DIFF}}/r readable-diff.txt" -e "/{{DIFF}}/d" > comment-body.md + + gh issue comment "$ISSUE_NUM" --body-file comment-body.md + fi + + - name: Update cache with new patterns + if: steps.compare.outputs.changed == 'true' + run: | + mkdir -p .cache + cp regex.json .cache/regex.json + echo "✓ Cache updated with new patterns" + + - name: Delete old cache + if: steps.compare.outputs.changed == 'true' + continue-on-error: true + env: + GH_TOKEN: ${{ github.token }} + run: | + gh cache delete tinytex-regex-latest || echo "No existing cache to delete" + + - name: Save new cache + if: steps.compare.outputs.changed == 'true' || steps.compare.outputs.first_run == 'true' + uses: actions/cache/save@v4 + with: + path: .cache/regex.json + key: tinytex-regex-latest + + - name: Summary + if: always() + run: | + if [ "${{ steps.compare.outputs.first_run }}" == "true" ]; then + echo "✓ Baseline established - cache created" + elif [ "${{ steps.compare.outputs.changed }}" == "true" ]; then + echo "✗ Pattern changes detected - issue created/updated" + else + echo "✓ No pattern changes - cache hit" + fi diff --git a/.gitignore b/.gitignore index 918ce8d503..a4012725b0 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ package/dist/** /tests/test-out.json *~ .env +.private-journal/ # deno_std library src/resources/deno_std/cache src/vendor-* diff --git a/dev-docs/tinytex-pattern-maintenance.md b/dev-docs/tinytex-pattern-maintenance.md new file mode 100644 index 0000000000..adf20b1c15 --- /dev/null +++ b/dev-docs/tinytex-pattern-maintenance.md @@ -0,0 +1,118 @@ +# Quarto LaTeX engine Pattern Maintenance + +Quarto tracks **tinytex** R package's LaTeX error detection patterns to provide helpful diagnostics when LaTeX compilation fails. This document describes the automated verification process and manual adaptation workflow. + +## Overview + +The R package **tinytex** maintains a comprehensive database of LaTeX error patterns in its parsing error logic, and export this in `regex.json` in its daily release. It can detect missing packages and fonts. We track these patterns because: + +- TinyTeX is the distribution maintain by Posit team actively maintains patterns based on user reports (@yihui and @cderv) +- It is used by Quarto (`quarto install tinytex`) +- Every problem will be fixed in the R package first +- Low update frequency (~4 changes/year) makes manual adaptation practical + +**Our process:** + +- Daily automated check detects when TinyTeX patterns change +- GitHub issue created/updated when changes detected +- Manual review and adaptation for Quarto's usage + +## Pattern Differences + +tinytex R package and Quarto LaTeX engine use patterns differently: + +- R package: Matches patterns line-by-line against log array +- Quarto: Matches patterns against entire log file as string + +### Common Adaptations + +1. **Direct copy** (most common): + + ```typescript + // TinyTeX: ".*! LaTeX Error: File [`']([^']+)' not found.*" + // Quarto: + /.*! LaTeX Error: File [`']([^']+)' not found.*/g; + ``` + +2. **Anchored patterns** need multiline flag or anchor removal: + + ```typescript + // TinyTeX: "^No file ([^`'. ]+[.]fd)[.].*" + // Quarto options: + /^No file ([^`'. ]+[.]fd)[.].*/gm // multiline flag + /.*No file ([^`'. ]+[.]fd)[.].*/g // remove anchor + ``` + +3. **Filter functions** for post-processing: + + ```typescript + { + regex: /.*! Font [^=]+=([^ ]+).+ not loadable.*/g, + filter: formatFontFilter, // Cleans font names + } + ``` + +## Manual Adaptation Process + +When the automated workflow detects TinyTeX pattern changes, it creates/updates a GitHub issue with: + +- Date of detection +- Category-by-category count changes +- Full diff of `regex.json` changes + +### Adaptation Steps + +1. Review the diff: + + - Identify added, modified, or removed patterns + +2. Update [parse-error.ts](../src/command/render/latexmk/parse-error.ts): + + - Add new patterns to `packageMatchers` array + - Convert TinyTeX string patterns to TypeScript regex with `/g` flag + - Add multiline flag `/gm` if pattern uses `^` or `$` anchors + - Add filter function if pattern needs post-processing + +3. Test changes + + ```bash + cd tests + # Windows + pwsh -Command '$env:QUARTO_TESTS_NO_CONFIG="true"; .\run-tests.ps1 unit\latexmk\parse-error.test.ts' + # Linux/macOS + QUARTO_TESTS_NO_CONFIG=true ./run-tests.sh unit/latexmk/parse-error.test.ts + ``` + +4. Commit and close issue + +## Verification Workflow + +The automated workflow runs daily: + +1. Downloads `regex.tar.gz` from [TinyTeX releases](https://github.com/rstudio/tinytex-releases) +2. Extracts and compares `regex.json` with cached version +3. If changed: generates diff and creates/updates issue +4. If unchanged: exits early (no notification) + +**Workflow location**: [.github/workflows/verify-tinytex-patterns.yml](../.github/workflows/verify-tinytex-patterns.yml) + +**Manual trigger**: Run workflow from GitHub Actions tab when testing or after TinyTeX release announcement + +## Current Coverage + +**Pattern implementation:** 22 of 23 patterns from TinyTeX (96%) + +**Not implemented:** +- `l3backend` pattern for LaTeX3 version mismatch detection +- Reason: Complex context-aware logic required, rare error case + +**Test coverage:** All documented TinyTeX error examples are tested + +**Important:** Patterns should support both backtick (`` ` ``) and single quote (`'`) for LaTeX error messages + +## Resources + +- [parse-error.ts](../src/command/render/latexmk/parse-error.ts) - Pattern implementation +- [parse-error.test.ts](../tests/unit/latexmk/parse-error.test.ts) - Unit tests +- [TinyTeX R source](https://github.com/rstudio/tinytex/blob/main/R/latex.R) - How patterns are used in R +- [TinyTeX releases](https://github.com/rstudio/tinytex-releases) - Source of regex.json diff --git a/src/command/render/latexmk/parse-error.ts b/src/command/render/latexmk/parse-error.ts index 6b7d64f2c3..ee99c76330 100644 --- a/src/command/render/latexmk/parse-error.ts +++ b/src/command/render/latexmk/parse-error.ts @@ -248,7 +248,7 @@ const packageMatchers = [ return `${match}.sty`; }, }, - { regex: /.* File `(.+eps-converted-to.pdf)'.*/g, filter: estoPdfFilter }, + { regex: /.* File [`'](.+eps-converted-to.pdf)'.*/g, filter: estoPdfFilter }, { regex: /.*xdvipdfmx:fatal: pdf_ref_obj.*/g, filter: estoPdfFilter }, { @@ -267,15 +267,27 @@ const packageMatchers = [ return "lua-uni-algos.lua"; }, }, + { + regex: /.* Package pdfx Error: No color profile ([^\s]*).*/g, + filter: (_match: string, _text: string) => { + return "colorprofiles.sty"; + }, + }, + { + regex: /.*No file ([^`'. ]+[.]fd)[.].*/g, + filter: (match: string, _text: string) => { + return match.toLowerCase(); + }, + }, { regex: /.* Loading '([^']+)' aborted!.*/g }, - { regex: /.*! LaTeX Error: File `([^']+)' not found.*/g }, + { regex: /.*! LaTeX Error: File [`']([^']+)' not found.*/g }, { regex: /.* [fF]ile ['`]?([^' ]+)'? not found.*/g }, { regex: /.*the language definition file ([^\s]*).*/g }, { regex: /.* \\(file ([^)]+)\\): cannot open .*/g }, - { regex: /.*file `([^']+)' .*is missing.*/g }, - { regex: /.*! CTeX fontset `([^']+)' is unavailable.*/g }, + { regex: /.*file [`']([^']+)' .*is missing.*/g }, + { regex: /.*! CTeX fontset [`']([^']+)' is unavailable.*/g }, { regex: /.*: ([^:]+): command not found.*/g }, - { regex: /.*! I can't find file `([^']+)'.*/g }, + { regex: /.*! I can't find file [`']([^']+)'.*/g }, ]; function fontSearchTerm(font: string): string { diff --git a/src/core/zip.ts b/src/core/zip.ts index 33cdeeeb5e..b9bc8170c9 100644 --- a/src/core/zip.ts +++ b/src/core/zip.ts @@ -40,8 +40,18 @@ export function unzip(file: string, dir?: string) { } } else { // use the tar command to untar this + // On Windows, prefer System32 tar to avoid Git Bash tar path issues + let tarCmd = "tar"; + if (isWindows) { + const systemRoot = Deno.env.get("SystemRoot") || "C:\\Windows"; + const system32Tar = `${systemRoot}\\System32\\tar.exe`; + if (existsSync(system32Tar)) { + tarCmd = system32Tar; + } + // Otherwise fall back to "tar" in PATH + } return execProcess( - { cmd: "tar", args: ["xfz", file], cwd: dir, stdout: "piped" }, + { cmd: tarCmd, args: ["xfz", file], cwd: dir, stdout: "piped" }, ); } } diff --git a/tests/unit/latexmk/parse-error.test.ts b/tests/unit/latexmk/parse-error.test.ts index 2a80106b8e..3a34e4f8f3 100644 --- a/tests/unit/latexmk/parse-error.test.ts +++ b/tests/unit/latexmk/parse-error.test.ts @@ -33,6 +33,7 @@ unitTest("Detect missing files with `findMissingFontsAndPackages`", async () => assertFound('(fontspec) The font "LibertinusSerif-Regular" cannot be', fontSearchTerm("LibertinusSerif-Regular")); assertFound('! Font \\JY3/mc/m/n/10=file:HaranoAjiMincho-Regular.otf:-kern;jfm=ujis at 9.24713pt not loadable: metric data not found or bad.', "HaranoAjiMincho-Regular.otf"); assertFound("! LaTeX Error: File `framed.sty' not found.", "framed.sty"); + assertFound("! LaTeX Error: File 'framed.sty' not found.", "framed.sty"); assertFound("/usr/local/bin/mktexpk: line 123: mf: command not found", "mf"); assertFound("or the language definition file ngerman.ldf was not found", "ngerman.ldf"); assertFound(`Package babel Error: Unknown option 'ngerman'. Either you misspelled it @@ -43,12 +44,30 @@ unitTest("Detect missing files with `findMissingFontsAndPackages`", async () => (babel) to the babel package options.`, "ngerman.ldf") assertFound("!pdfTeX error: pdflatex (file 8r.enc): cannot open encoding file for reading", "8r.enc"); assertFound("! CTeX fontset `fandol' is unavailable in current mode", "fandol"); + assertFound("! CTeX fontset 'fandol' is unavailable in current mode", "fandol"); assertFound('Package widetext error: Install the flushend package which is a part of sttools', "flushend.sty"); assertFound('! Package isodate.sty Error: Package file substr.sty not found.', "substr.sty"); assertFound("! Package fontenc Error: Encoding file `t2aenc.def' not found.", "t2aenc.def"); assertFound("! I can't find file `hyph-de-1901.ec.tex'.", "hyph-de-1901.ec.tex"); + assertFound("! I can't find file 'hyph-de-1901.ec.tex'.", "hyph-de-1901.ec.tex"); assertFound("luaotfload-features.lua:835: module 'lua-uni-normalize' not found:", "lua-uni-algos.lua"); assertFound("! LuaTeX-ja error: File 'jfm-zh_CN.lua' not found.", "jfm-zh_CN.lua"); + + // Additional test cases from tinytex R package examples (latex.R lines 537-607) + // https://github.com/rstudio/tinytex/blob/e96be3143b9af07768a124215b5fb5a1e6d183d3/R/latex.R#L538-L558 + assertFound('xdvipdfmx:fatal: Unable to find TFM file "rsfs10"', fontSearchTerm("rsfs10")); + assertFound("Package biblatex Info: ... file 'trad-abbrv.bbx' not found", "trad-abbrv.bbx"); + assertFound("! Package pdftex.def Error: File `logo-mdpi-eps-converted-to.pdf' not found", "epstopdf"); + assertFound("! Package pdftex.def Error: File 'logo-mdpi-eps-converted-to.pdf' not found", "epstopdf"); + assertFound("! xdvipdfmx:fatal: pdf_ref_obj(): passed invalid object.", "epstopdf"); + assertFound( + "! Package tikz Error: I did not find the tikz library 'hobby'. This error message was issued because the library or one of its sublibraries could not be found, probably because of a misspelling. Processed options: \"library={hobby}\". The possibly misspelled library name is \"hobby\". The library name should be one of the following (or you misspelled it): named tikzlibraryhobby.code.tex", + "tikzlibraryhobby.code.tex" + ); + assertFound("support file `supp-pdf.mkii' (supp-pdf.tex) is missing", "supp-pdf.mkii"); + assertFound("support file 'supp-pdf.mkii' (supp-pdf.tex) is missing", "supp-pdf.mkii"); + assertFound("! Package pdfx Error: No color profile sRGB_IEC61966-2-1_black_scaled.icc found", "colorprofiles.sty"); + assertFound("No file LGRcmr.fd. ! LaTeX Error: This NFSS system isn't set up properly.", "lgrcmr.fd"); },{ cwd: () => "unit/latexmk/" }) \ No newline at end of file