-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Fix Issue where libclang Does Not Include Standard Compiler Specific Headers #153746
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
|
@llvm/pr-subscribers-clang Author: Fristender (Fristender) ChangesAs discussed in #54626, sometimes libclang is compiled without the standard compiler-specific headers. This results in the libclang python bindings consistently not returning the initializer child cursor for VAR_DECL cursors if a NULL is present anywhere in the initializer. This is because NULL and other facilities are defined in the standard compiler-specific headers. Full diff: https://github.com/llvm/llvm-project/pull/153746.diff 2 Files Affected:
diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py
index 812ad2cd2dc13..4b662ad331047 100644
--- a/clang/bindings/python/clang/cindex.py
+++ b/clang/bindings/python/clang/cindex.py
@@ -83,6 +83,8 @@
import os
import sys
+import subprocess
+import platform
from enum import Enum
from typing import (
@@ -3335,6 +3337,23 @@ def from_source(
if index is None:
index = Index.create()
+ # Automatically include builtin headers if enabled
+ if Config.auto_include_builtin_headers:
+ builtin_include_path = Config.get_builtin_include_path()
+ if builtin_include_path:
+ # Check if include path is already specified
+ has_include_path = any(
+ arg == '-I' and i + 1 < len(args) and builtin_include_path in args[i + 1]
+ for i, arg in enumerate(args)
+ ) or any(
+ arg.startswith('-I') and builtin_include_path in arg[2:]
+ for arg in args
+ )
+
+ if not has_include_path:
+ # Add the builtin include path
+ args = ['-I', builtin_include_path] + list(args)
+
args_array = None
if len(args) > 0:
args_array = (c_char_p * len(args))(*[b(x) for x in args])
@@ -4309,6 +4328,8 @@ class Config:
library_file: str | None = None
compatibility_check = True
loaded = False
+ auto_include_builtin_headers = True
+ _builtin_include_path: str | None = None
@staticmethod
def set_library_path(path: StrPath) -> None:
@@ -4358,6 +4379,138 @@ def set_compatibility_check(check_status: bool) -> None:
Config.compatibility_check = check_status
+ @staticmethod
+ def set_auto_include_builtin_headers(enable: bool) -> None:
+ """Enable/disable automatic inclusion of builtin clang headers.
+
+ When enabled (default), the Python bindings will automatically detect
+ and include the builtin clang headers (such as stddef.h, stdint.h, etc.)
+ that contain essential macros like NULL, offsetof, etc. This prevents
+ issues where these macros are not recognized during parsing.
+
+ Parameters:
+ enable -- True to automatically include builtin headers, False to disable
+ """
+ if Config.loaded:
+ raise Exception(
+ "auto_include_builtin_headers must be set before using "
+ "any other functionalities in libclang."
+ )
+
+ Config.auto_include_builtin_headers = enable
+
+ @staticmethod
+ def get_builtin_include_path() -> str | None:
+ """Get the path to clang's builtin headers.
+
+ Returns the path to clang's builtin include directory, or None if not found.
+ This path contains essential headers like stddef.h that define macros such as NULL.
+ """
+ if Config._builtin_include_path is not None:
+ return Config._builtin_include_path
+
+ # Try multiple strategies to find clang's builtin headers
+ candidates = []
+
+ # Strategy 1: Query clang directly for its resource directory
+ try:
+ result = subprocess.run(
+ ['clang', '-print-resource-dir'],
+ capture_output=True, text=True, timeout=10
+ )
+ if result.returncode == 0:
+ resource_dir = result.stdout.strip()
+ include_dir = os.path.join(resource_dir, 'include')
+ candidates.append(include_dir)
+ except (subprocess.SubprocessError, OSError, subprocess.TimeoutExpired):
+ pass
+
+ # Strategy 2: Try clang version-based paths
+ try:
+ result = subprocess.run(
+ ['clang', '--version'],
+ capture_output=True, text=True, timeout=10
+ )
+ if result.returncode == 0:
+ # Extract version from output like "clang version 19.1.7"
+ for line in result.stdout.splitlines():
+ if 'clang version' in line.lower():
+ parts = line.split()
+ for part in parts:
+ if part and part[0].isdigit():
+ major_version = part.split('.')[0]
+ # Common paths on different systems
+ candidates.extend([
+ f"/usr/lib/clang/{major_version}/include",
+ f"/usr/local/lib/clang/{major_version}/include",
+ f"/opt/homebrew/lib/clang/{major_version}/include", # macOS Homebrew
+ f"/usr/lib/llvm-{major_version}/lib/clang/{major_version}/include", # Ubuntu
+ ])
+ break
+ break
+ except (subprocess.SubprocessError, OSError, subprocess.TimeoutExpired):
+ pass
+
+ # Strategy 3: Check LLVM source tree locations (for developers working with source)
+ # Try to detect if we're running from within an LLVM source tree
+ current_dir = os.path.dirname(os.path.abspath(__file__))
+ # Navigate up to find the LLVM project root
+ llvm_project_roots = []
+ check_dir = current_dir
+ for _ in range(10): # Don't go more than 10 levels up
+ if os.path.basename(check_dir) in ['llvm-project', 'llvm']:
+ llvm_project_roots.append(check_dir)
+ parent = os.path.dirname(check_dir)
+ if parent == check_dir: # Reached root
+ break
+ check_dir = parent
+
+ # Also check common relative paths from current location
+ possible_roots = [
+ os.path.join(current_dir, '..', '..', '..', '..'), # From clang/bindings/python/clang
+ os.path.join(current_dir, '..', '..', '..'),
+ os.path.join(current_dir, '..', '..'),
+ ]
+
+ for root in llvm_project_roots + possible_roots:
+ if os.path.exists(root):
+ # Check for clang/lib/Headers in the source tree
+ headers_path = os.path.join(root, 'clang', 'lib', 'Headers')
+ if os.path.exists(headers_path):
+ candidates.append(headers_path)
+
+ # Strategy 4: Check common installation paths
+ system = platform.system()
+ if system == "Windows":
+ # On Windows, check common LLVM installation paths
+ program_files_paths = [
+ os.environ.get('ProgramFiles', r'C:\Program Files'),
+ os.environ.get('ProgramFiles(x86)', r'C:\Program Files (x86)'),
+ ]
+ for pf in program_files_paths:
+ if pf and os.path.exists(pf):
+ llvm_base = os.path.join(pf, 'LLVM')
+ if os.path.exists(llvm_base):
+ for item in os.listdir(llvm_base):
+ lib_path = os.path.join(llvm_base, item, 'lib', 'clang')
+ if os.path.exists(lib_path):
+ for version in os.listdir(lib_path):
+ include_path = os.path.join(lib_path, version, 'include')
+ candidates.append(include_path)
+
+ # Find the first existing candidate
+ for candidate in candidates:
+ if candidate and os.path.isdir(candidate):
+ # Verify it contains stddef.h as a sanity check
+ stddef_path = os.path.join(candidate, 'stddef.h')
+ if os.path.isfile(stddef_path):
+ Config._builtin_include_path = candidate
+ return candidate
+
+ # If nothing found, cache the negative result
+ Config._builtin_include_path = ""
+ return None
+
@CachedProperty
def lib(self) -> CDLL:
lib = self.get_cindex_library()
diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake
index a523cc561b3f9..685f1240d4b2b 100644
--- a/clang/cmake/caches/Release.cmake
+++ b/clang/cmake/caches/Release.cmake
@@ -84,6 +84,8 @@ if (LLVM_RELEASE_ENABLE_PGO)
set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED IR CACHE STRING "")
set(BOOTSTRAP_LLVM_ENABLE_RUNTIMES "compiler-rt" CACHE STRING "")
set(BOOTSTRAP_LLVM_ENABLE_PROJECTS "clang;lld" CACHE STRING "")
+ # Ensure clang resource headers are available in instrumented stage
+ set(BOOTSTRAP_CLANG_RESOURCE_DIR "" CACHE STRING "")
else()
if (LLVM_RELEASE_ENABLE_LTO)
@@ -109,6 +111,8 @@ endif()
set(LLVM_ENABLE_RUNTIMES ${STAGE1_RUNTIMES} CACHE STRING "")
set(LLVM_ENABLE_PROJECTS ${STAGE1_PROJECTS} CACHE STRING "")
set(LIBCXX_STATICALLY_LINK_ABI_IN_STATIC_LIBRARY ON CACHE STRING "")
+# Ensure clang resource headers are properly embedded for standalone libclang
+set(CLANG_RESOURCE_DIR "" CACHE STRING "")
# stage2-instrumented and Final Stage Config:
# Options that need to be set in both the instrumented stage (if we are doing
@@ -120,6 +124,8 @@ if (LLVM_RELEASE_ENABLE_LTO)
endif()
set_instrument_and_final_stage_var(LLVM_ENABLE_LIBCXX "ON" BOOL)
set_instrument_and_final_stage_var(LLVM_STATIC_LINK_CXX_STDLIB "ON" BOOL)
+# Ensure clang resource headers are properly embedded in all stages for standalone libclang
+set_instrument_and_final_stage_var(CLANG_RESOURCE_DIR "" STRING)
set(RELEASE_LINKER_FLAGS "-rtlib=compiler-rt --unwindlib=libunwind")
if(NOT ${CMAKE_HOST_SYSTEM_NAME} MATCHES "Darwin")
set(RELEASE_LINKER_FLAGS "${RELEASE_LINKER_FLAGS} -static-libgcc")
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately I am not at all familiar with the system's side so I can't really comment on the header search strategies you implemented and in how far this is the appropriate solution.
@Endilll are you perhaps familiar with this, or would it be better to ask Aaron?
You can test this locally with the following command:darker --check --diff -r HEAD~1...HEAD clang/bindings/python/clang/cindex.pyView the diff from darker here.--- cindex.py 2025-08-15 04:20:51.000000 +0000
+++ cindex.py 2025-08-15 21:17:08.418355 +0000
@@ -3341,20 +3341,22 @@
if Config.auto_include_builtin_headers:
builtin_include_path = Config.get_builtin_include_path()
if builtin_include_path:
# Check if include path is already specified
has_include_path = any(
- arg == '-I' and i + 1 < len(args) and builtin_include_path in args[i + 1]
+ arg == "-I"
+ and i + 1 < len(args)
+ and builtin_include_path in args[i + 1]
for i, arg in enumerate(args)
) or any(
- arg.startswith('-I') and builtin_include_path in arg[2:]
+ arg.startswith("-I") and builtin_include_path in arg[2:]
for arg in args
)
-
+
if not has_include_path:
# Add the builtin include path
- args = ['-I', builtin_include_path] + list(args)
+ args = ["-I", builtin_include_path] + list(args)
args_array = None
if len(args) > 0:
args_array = (c_char_p * len(args))(*[b(x) for x in args])
@@ -4380,135 +4382,142 @@
Config.compatibility_check = check_status
@staticmethod
def set_auto_include_builtin_headers(enable: bool) -> None:
"""Enable/disable automatic inclusion of builtin clang headers.
-
+
When enabled (default), the Python bindings will automatically detect
and include the builtin clang headers (such as stddef.h, stdint.h, etc.)
that contain essential macros like NULL, offsetof, etc. This prevents
issues where these macros are not recognized during parsing.
-
+
Parameters:
enable -- True to automatically include builtin headers, False to disable
"""
if Config.loaded:
raise Exception(
"auto_include_builtin_headers must be set before using "
"any other functionalities in libclang."
)
-
+
Config.auto_include_builtin_headers = enable
@staticmethod
def get_builtin_include_path() -> str | None:
"""Get the path to clang's builtin headers.
-
+
Returns the path to clang's builtin include directory, or None if not found.
This path contains essential headers like stddef.h that define macros such as NULL.
"""
if Config._builtin_include_path is not None:
return Config._builtin_include_path
-
+
# Try multiple strategies to find clang's builtin headers
candidates = []
-
+
# Strategy 1: Query clang directly for its resource directory
try:
result = subprocess.run(
- ['clang', '-print-resource-dir'],
- capture_output=True, text=True, timeout=10
+ ["clang", "-print-resource-dir"],
+ capture_output=True,
+ text=True,
+ timeout=10,
)
if result.returncode == 0:
resource_dir = result.stdout.strip()
- include_dir = os.path.join(resource_dir, 'include')
+ include_dir = os.path.join(resource_dir, "include")
candidates.append(include_dir)
except (subprocess.SubprocessError, OSError, subprocess.TimeoutExpired):
pass
-
+
# Strategy 2: Try clang version-based paths
try:
result = subprocess.run(
- ['clang', '--version'],
- capture_output=True, text=True, timeout=10
+ ["clang", "--version"], capture_output=True, text=True, timeout=10
)
if result.returncode == 0:
# Extract version from output like "clang version 19.1.7"
for line in result.stdout.splitlines():
- if 'clang version' in line.lower():
+ if "clang version" in line.lower():
parts = line.split()
for part in parts:
if part and part[0].isdigit():
- major_version = part.split('.')[0]
+ major_version = part.split(".")[0]
# Common paths on different systems
- candidates.extend([
- f"/usr/lib/clang/{major_version}/include",
- f"/usr/local/lib/clang/{major_version}/include",
- f"/opt/homebrew/lib/clang/{major_version}/include", # macOS Homebrew
- f"/usr/lib/llvm-{major_version}/lib/clang/{major_version}/include", # Ubuntu
- ])
+ candidates.extend(
+ [
+ f"/usr/lib/clang/{major_version}/include",
+ f"/usr/local/lib/clang/{major_version}/include",
+ f"/opt/homebrew/lib/clang/{major_version}/include", # macOS Homebrew
+ f"/usr/lib/llvm-{major_version}/lib/clang/{major_version}/include", # Ubuntu
+ ]
+ )
break
break
except (subprocess.SubprocessError, OSError, subprocess.TimeoutExpired):
pass
-
+
# Strategy 3: Check LLVM source tree locations (for developers working with source)
# Try to detect if we're running from within an LLVM source tree
current_dir = os.path.dirname(os.path.abspath(__file__))
# Navigate up to find the LLVM project root
llvm_project_roots = []
check_dir = current_dir
for _ in range(10): # Don't go more than 10 levels up
- if os.path.basename(check_dir) in ['llvm-project', 'llvm']:
+ if os.path.basename(check_dir) in ["llvm-project", "llvm"]:
llvm_project_roots.append(check_dir)
parent = os.path.dirname(check_dir)
if parent == check_dir: # Reached root
break
check_dir = parent
-
+
# Also check common relative paths from current location
possible_roots = [
- os.path.join(current_dir, '..', '..', '..', '..'), # From clang/bindings/python/clang
- os.path.join(current_dir, '..', '..', '..'),
- os.path.join(current_dir, '..', '..'),
+ os.path.join(
+ current_dir, "..", "..", "..", ".."
+ ), # From clang/bindings/python/clang
+ os.path.join(current_dir, "..", "..", ".."),
+ os.path.join(current_dir, "..", ".."),
]
-
+
for root in llvm_project_roots + possible_roots:
if os.path.exists(root):
# Check for clang/lib/Headers in the source tree
- headers_path = os.path.join(root, 'clang', 'lib', 'Headers')
+ headers_path = os.path.join(root, "clang", "lib", "Headers")
if os.path.exists(headers_path):
candidates.append(headers_path)
-
+
# Strategy 4: Check common installation paths
system = platform.system()
if system == "Windows":
# On Windows, check common LLVM installation paths
program_files_paths = [
- os.environ.get('ProgramFiles', r'C:\Program Files'),
- os.environ.get('ProgramFiles(x86)', r'C:\Program Files (x86)'),
+ os.environ.get("ProgramFiles", r"C:\Program Files"),
+ os.environ.get("ProgramFiles(x86)", r"C:\Program Files (x86)"),
]
for pf in program_files_paths:
if pf and os.path.exists(pf):
- llvm_base = os.path.join(pf, 'LLVM')
+ llvm_base = os.path.join(pf, "LLVM")
if os.path.exists(llvm_base):
for item in os.listdir(llvm_base):
- lib_path = os.path.join(llvm_base, item, 'lib', 'clang')
+ lib_path = os.path.join(llvm_base, item, "lib", "clang")
if os.path.exists(lib_path):
for version in os.listdir(lib_path):
- include_path = os.path.join(lib_path, version, 'include')
+ include_path = os.path.join(
+ lib_path, version, "include"
+ )
candidates.append(include_path)
-
+
# Find the first existing candidate
for candidate in candidates:
if candidate and os.path.isdir(candidate):
# Verify it contains stddef.h as a sanity check
- stddef_path = os.path.join(candidate, 'stddef.h')
+ stddef_path = os.path.join(candidate, "stddef.h")
if os.path.isfile(stddef_path):
Config._builtin_include_path = candidate
return candidate
-
+
# If nothing found, cache the negative result
Config._builtin_include_path = ""
return None
@CachedProperty
|
|
If we're going to explicitly add a flag for the resource dir, we should use the flag We have code in libclang which is supposed to handle this; clang_parseTranslationUnit_Impl calls getClangResourcesPath() to try to compute the path to the resource dir. I'm not sure if we're computing the right path, though, or if we actually pass it down to the driver correctly. |
As discussed in #54626, sometimes libclang is compiled without the standard compiler-specific headers. This results in the libclang python bindings consistently not returning the initializer child cursor for VAR_DECL cursors if a NULL is present anywhere in the initializer. This is because NULL and other facilities are defined in the standard compiler-specific headers.
With this fix, libclang works as expected.
This pull request resolves #54626.