Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 55 additions & 2 deletions code_review_graph/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
import hashlib
import json
import logging
import os
import re
import subprocess
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import NamedTuple, Optional

import tree_sitter_language_pack as tslp

from .tsconfig_resolver import TsconfigResolver


Expand Down Expand Up @@ -43,6 +44,49 @@ class CellInfo(NamedTuple):

logger = logging.getLogger(__name__)

_PARSER_LOAD_TIMEOUT_SECONDS = float(os.environ.get("CRG_PARSER_LOAD_TIMEOUT_SECONDS", "5"))
_UNAVAILABLE_LANGUAGES: set[str] = set()


def _parser_load_probe_succeeds(
language: str,
timeout_seconds: float = _PARSER_LOAD_TIMEOUT_SECONDS,
) -> bool:
"""Return whether a tree-sitter language can be loaded without hanging.

Some binary bindings in tree-sitter-language-pack can hang during native
module import on specific platform/Python combinations. Probe in a child
process first so the main build can skip only that language instead of
blocking the whole repository graph build until an outer timeout kills it.
"""
code = (
"from tree_sitter_language_pack import get_parser\n"
"import sys\n"
"get_parser(sys.argv[1])\n"
)
try:
completed = subprocess.run(
[sys.executable, "-c", code, language],
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True,
timeout=timeout_seconds,
check=False,
)
except (subprocess.TimeoutExpired, OSError) as exc:
logger.debug("tree-sitter parser probe failed for %s: %s", language, exc)
return False
if completed.returncode != 0:
logger.debug(
"tree-sitter parser probe unavailable for %s: %s",
language,
completed.stderr.strip(),
)
return False
return True


# ---------------------------------------------------------------------------
# Data models for extracted entities
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -788,11 +832,20 @@ def __init__(self) -> None:
self._dart_pubspec_cache: dict[tuple[str, str], Optional[Path]] = {}

def _get_parser(self, language: str): # type: ignore[arg-type]
if language in _UNAVAILABLE_LANGUAGES:
return None
if language not in self._parsers:
if not _parser_load_probe_succeeds(language):
_UNAVAILABLE_LANGUAGES.add(language)
logger.warning("Skipping unavailable tree-sitter parser for %s", language)
return None
Comment on lines 834 to +841
try:
import tree_sitter_language_pack as tslp

self._parsers[language] = tslp.get_parser(language) # type: ignore[arg-type]
except (LookupError, ValueError, ImportError) as exc:
# language not packaged, or grammar load failed
_UNAVAILABLE_LANGUAGES.add(language)
logger.debug("tree-sitter parser unavailable for %s: %s", language, exc)
return None
return self._parsers[language]
Expand Down
19 changes: 19 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,36 @@
"""Tests for the Tree-sitter parser module."""

import subprocess
import tempfile
from pathlib import Path

from code_review_graph import parser as parser_module
from code_review_graph.parser import CodeParser

FIXTURES = Path(__file__).parent / "fixtures"


class TestCodeParser:
def setup_method(self):
parser_module._UNAVAILABLE_LANGUAGES.clear()
self.parser = CodeParser()

def teardown_method(self):
parser_module._UNAVAILABLE_LANGUAGES.clear()

def test_parser_probe_timeout_marks_language_unavailable(self, monkeypatch):
"""A hanging tree-sitter binding import must skip that language, not hang builds."""

def fake_run(*args, **kwargs):
raise subprocess.TimeoutExpired(cmd=args[0], timeout=kwargs.get("timeout"))

monkeypatch.setattr(parser_module.subprocess, "run", fake_run)
monkeypatch.setattr(parser_module, "_UNAVAILABLE_LANGUAGES", set())

assert parser_module._parser_load_probe_succeeds("tsx", timeout_seconds=0.01) is False
assert CodeParser()._get_parser("tsx") is None
assert parser_module._UNAVAILABLE_LANGUAGES == {"tsx"}

def test_detect_language_python(self):
assert self.parser.detect_language(Path("foo.py")) == "python"

Expand Down
Loading