Skip to content
26 changes: 26 additions & 0 deletions clang/docs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,32 @@ if (LLVM_ENABLE_SPHINX)
gen_rst_file_from_td(DiagnosticsReference.rst -gen-diag-docs ../include/clang/Basic/Diagnostic.td "${docs_targets}")
gen_rst_file_from_td(ClangCommandLineReference.rst -gen-opt-docs ../include/clang/Driver/ClangOptionDocs.td "${docs_targets}")

# Another generated file from a different source
set(docs_tools_dir ${CMAKE_CURRENT_SOURCE_DIR}/tools)
set(aopts_rst_rel_path analyzer/user-docs/Options.rst)
set(aopts_rst "${CMAKE_CURRENT_BINARY_DIR}/${aopts_rst_rel_path}")
set(analyzeroptions_def "${CMAKE_CURRENT_SOURCE_DIR}/../include/clang/StaticAnalyzer/Core/AnalyzerOptions.def")
set(aopts_rst_in "${CMAKE_CURRENT_SOURCE_DIR}/${aopts_rst_rel_path}.in")
set(generate_aopts_docs generate_analyzer_options_docs.py)
add_custom_command(
OUTPUT ${aopts_rst}
COMMAND ${Python3_EXECUTABLE} ${generate_aopts_docs} -i ${analyzeroptions_def} -t ${aopts_rst_in} -o ${aopts_rst}
WORKING_DIRECTORY ${docs_tools_dir}
VERBATIM
COMMENT "Generating ${aopts_rst}"
DEPENDS ${docs_tools_dir}/${generate_aopts_docs}
${aopts_rst_in}
copy-clang-rst-docs
)
add_custom_target(generate-analyzer-options-rst DEPENDS ${aopts_rst})
foreach(target ${docs_targets})
add_dependencies(${target} generate-analyzer-options-rst)
endforeach()

# Technically this is redundant because generate-analyzer-options-rst
# depends on the copy operation (because it wants to drop a generated file
# into a subdirectory of the copied tree), but I'm leaving it here for the
# sake of clarity.
foreach(target ${docs_targets})
add_dependencies(${target} copy-clang-rst-docs)
endforeach()
Expand Down
1 change: 1 addition & 0 deletions clang/docs/analyzer/user-docs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Contents:

user-docs/Installation
user-docs/CommandLineUsage
user-docs/Options
user-docs/UsingWithXCode
user-docs/FilingBugs
user-docs/CrossTranslationUnit
Expand Down
2 changes: 2 additions & 0 deletions clang/docs/analyzer/user-docs/CommandLineUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,8 @@ When compiling your application to run on the simulator, it is important that **

If you aren't certain which compiler Xcode uses to build your project, try just running ``xcodebuild`` (without **scan-build**). You should see the full path to the compiler that Xcode is using, and use that as an argument to ``--use-cc``.

.. _command-line-usage-CodeChecker:

CodeChecker
-----------

Expand Down
102 changes: 102 additions & 0 deletions clang/docs/analyzer/user-docs/Options.rst.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
========================
Configuring the Analyzer
========================

The clang static analyzer supports two kinds of options:

1. Global **analyzer options** influence the behavior of the analyzer engine.
They are documented on this page, in the section :ref:`List of analyzer
options<list-of-analyzer-options>`.
2. The **checker options** belong to individual checkers (e.g.
``core.BitwiseShift:Pedantic`` and ``unix.Stream:Pedantic`` are completely
separate options) and customize the behavior of that particular checker.
These are documented within the documentation of each individual checker at
:doc:`../checkers`.

Assigning values to options
===========================

With the compiler frontend
--------------------------

All options can be configured by using the ``-analyzer-config`` flag of ``clang
-cc1`` (the so-called *compiler frontend* part of clang). The values of the
options are specified with the syntax ``-analyzer-config
OPT=VAL,OPT2=VAL2,...`` which supports specifying multiple options, but
separate flags like ``-analyzer-config OPT=VAL -analyzer-config OPT2=VAL2`` are
also accepted (with equivalent behavior). Analyzer options and checker options
can be freely intermixed here because it's easy to recognize that checker
option names are always prefixed with ``some.groups.NameOfChecker:``.

With the clang driver
---------------------

In a conventional workflow ``clang -cc1`` (which is a low-level internal
interface) is invoked indirectly by the clang *driver* (i.e. plain ``clang``
without the ``-cc1`` flag), which acts as an "even more frontend" wrapper layer
around the ``clang -cc1`` *compiler frontend*. In this situation **each**
command line argument intended for the *compiler frontend* must be prefixed
with ``-Xclang``.

For example the following command analyzes ``foo.c`` in :ref:`shallow mode
<analyzer-option-mode>` with :ref:`loop unrolling
<analyzer-option-unroll-loops>`:

::

clang --analyze -Xclang -analyzer-config -Xclang mode=shallow,unroll-loops=true foo.c

When this is executed, the *driver* will compose and execute the following
``clang -cc1`` command (which can be inspected by passing the ``-v`` flag to
the *driver*):

::

clang -cc1 -analyze [...] -analyzer-config mode=shallow,unroll-loops=true foo.c

Here ``[...]`` stands for dozens of low-level flags which ensure that ``clang
-cc1`` does the right thing (e.g. ``-fcolor-diagnostics`` when it's suitable;
``-analyzer-checker`` flags to enable a sane default set of checkers). Also
note the distinction that the ``clang`` *driver* requires ``--analyze`` (double
dashes) while the ``clang -cc1`` *compiler frontend* requires ``-analyze``
(single dash).

With CodeChecker
----------------

If the analysis is performed through :ref:`CodeChecker
<command-line-usage-CodeChecker>` (which e.g. supports the analysis of a whole
project instead of a single file) then it will act as another indirection
layer. CodeChecker provides separate command-line flags called
``--analyzer-config`` (for analyzer options) and ``--checker-config`` (for
checker options):

::

CodeChecker analyze -o outdir --checker-config clangsa:unix.Stream:Pedantic=true \
--analyzer-config clangsa:mode=shallow clangsa:unroll-loops=true \
-- compile_commands.json

These CodeChecker flags may be followed by multiple ``OPT=VAL`` pairs as
separate arguments (and this is why the example needs to use ``--`` before
``compile_commands.json``). The option names are all prefixed with ``clangsa:``
to ensure that they are passed to the clang static analyzer (and not other
analyzer tools that are also supported by CodeChecker).

.. _list-of-analyzer-options:

List of analyzer options
========================

.. warning::
These options are primarily intended for development purposes. Changing
their values may drastically alter the behavior of the analyzer, and may
even result in instabilities or crashes!

..
The contents of this section are automatically generated by the script
clang/docs/tools/generate_analyzer_options_docs.py from the header file
AnalyzerOptions.def to ensure that the RST/web documentation is synchronized
with the command line help options.

.. OPTIONS_LIST_PLACEHOLDER
242 changes: 242 additions & 0 deletions clang/docs/tools/generate_analyzer_options_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
#!/usr/bin/env python3
# A tool to automatically generate documentation for the config options of the
# clang static analyzer by reading `AnalyzerOptions.def`.

import argparse
from collections import namedtuple
from enum import Enum, auto
import re
import sys
import textwrap


# The following code implements a trivial parser for the narrow subset of C++
# which is used in AnalyzerOptions.def. This supports the following features:
# - ignores preprocessor directives, even if they are continued with \ at EOL
# - ignores comments: both /* ... */ and // ...
# - parses string literals (even if they contain \" escapes)
# - concatenates adjacent string literals
# - parses numbers even if they contain ' as a thousands separator
# - recognizes MACRO(arg1, arg2, ..., argN) calls


class TT(Enum):
"Token type enum."
number = auto()
ident = auto()
string = auto()
punct = auto()


TOKENS = [
(re.compile(r"-?[0-9']+"), TT.number),
(re.compile(r"\w+"), TT.ident),
(re.compile(r'"([^\\"]|\\.)*"'), TT.string),
(re.compile(r"[(),]"), TT.punct),
(re.compile(r"/\*((?!\*/).)*\*/", re.S), None), # C-style comment
(re.compile(r"//.*\n"), None), # C++ style oneline comment
(re.compile(r"#.*(\\\n.*)*(?<!\\)\n"), None), # preprocessor directive
(re.compile(r"\s+"), None), # whitespace
]

Token = namedtuple("Token", "kind code")


def report_unexpected(s, pos):
lines = (s[:pos] + "X").split("\n")
lineno, col = (len(lines), len(lines[-1]))
print(
"unexpected character %r in AnalyzerOptions.def at line %d column %d"
% (s[pos], lineno, col),
file=sys.stderr,
)


def tokenize(s):
result = []
pos = 0
while pos < len(s):
for regex, kind in TOKENS:
if m := regex.match(s, pos):
if kind is not None:
result.append(Token(kind, m.group(0)))
pos = m.end()
break
else:
report_unexpected(s, pos)
pos += 1
return result


def join_strings(tokens):
result = []
for tok in tokens:
if tok.kind == TT.string and result and result[-1].kind == TT.string:
# If this token is a string, and the previous non-ignored token is
# also a string, then merge them into a single token. We need to
# discard the closing " of the previous string and the opening " of
# this string.
prev = result.pop()
result.append(Token(TT.string, prev.code[:-1] + tok.code[1:]))
else:
result.append(tok)
return result


MacroCall = namedtuple("MacroCall", "name args")


class State(Enum):
"States of the state machine used for parsing the macro calls."
init = auto()
after_ident = auto()
before_arg = auto()
after_arg = auto()


def get_calls(tokens, macro_names):
state = State.init
result = []
current = None
for tok in tokens:
if state == State.init and tok.kind == TT.ident and tok.code in macro_names:
current = MacroCall(tok.code, [])
state = State.after_ident
elif state == State.after_ident and tok == Token(TT.punct, "("):
state = State.before_arg
elif state == State.before_arg:
if current is not None:
current.args.append(tok)
state = State.after_arg
elif state == State.after_arg and tok.kind == TT.punct:
if tok.code == ")":
result.append(current)
current = None
state = State.init
elif tok.code == ",":
state = State.before_arg
else:
current = None
state = State.init
return result


# The information will be extracted from calls to these two macros:
# #define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL)
# #define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC,
# SHALLOW_VAL, DEEP_VAL)

MACRO_NAMES_ARGCOUNTS = {
"ANALYZER_OPTION": 5,
"ANALYZER_OPTION_DEPENDS_ON_USER_MODE": 6,
}


def string_value(tok):
if tok.kind != TT.string:
raise ValueError(f"expected a string token, got {tok.kind.name}")
text = tok.code[1:-1] # Remove quotes
text = re.sub(r"\\(.)", r"\1", text) # Resolve backslash escapes
return text


def cmdflag_to_rst_title(cmdflag_tok):
text = string_value(cmdflag_tok)
underline = "-" * len(text)
ref = f".. _analyzer-option-{text}:"

return f"{ref}\n\n{text}\n{underline}\n\n"


def desc_to_rst_paragraphs(tok):
desc = string_value(tok)

# Escape a star that would act as inline emphasis within RST.
desc = desc.replace("ctu-max-nodes-*", r"ctu-max-nodes-\*")

# Many descriptions end with "Value: <list of accepted values>", which is
# OK for a terse command line printout, but should be prettified for web
# documentation.
# Moreover, the option ctu-invocation-list shows some example file content
# which is formatted as a preformatted block.
paragraphs = [desc]
extra = ""
if m := re.search(r"(^|\s)Value:", desc):
paragraphs = [desc[: m.start()], "Accepted values:" + desc[m.end() :]]
elif m := re.search(r"\s*Example file.content:", desc):
paragraphs = [desc[: m.start()]]
extra = "Example file content::\n\n " + desc[m.end() :] + "\n\n"

wrapped = [textwrap.fill(p, width=80) for p in paragraphs if p.strip()]

return "\n\n".join(wrapped + [""]) + extra


def default_to_rst(tok):
if tok.kind == TT.string:
if tok.code == '""':
return "(empty string)"
return tok.code
if tok.kind == TT.ident:
return tok.code
if tok.kind == TT.number:
return tok.code.replace("'", "")
raise ValueError(f"unexpected token as default value: {tok.kind.name}")


def defaults_to_rst_paragraph(defaults):
strs = [default_to_rst(d) for d in defaults]

if len(strs) == 1:
return f"Default value: {strs[0]}\n\n"
if len(strs) == 2:
return (
f"Default value: {strs[0]} (in shallow mode) / {strs[1]} (in deep mode)\n\n"
)
raise ValueError("unexpected count of default values: %d" % len(defaults))


def macro_call_to_rst_paragraphs(macro_call):
if len(macro_call.args) != MACRO_NAMES_ARGCOUNTS[macro_call.name]:
return ""

try:
_, _, cmdflag, desc, *defaults = macro_call.args

return (
cmdflag_to_rst_title(cmdflag)
+ desc_to_rst_paragraphs(desc)
+ defaults_to_rst_paragraph(defaults)
)
except ValueError as ve:
print(ve.args[0], file=sys.stderr)
return ""


def get_option_list(input_file):
with open(input_file, encoding="utf-8") as f:
contents = f.read()
tokens = join_strings(tokenize(contents))
macro_calls = get_calls(tokens, MACRO_NAMES_ARGCOUNTS)

result = ""
for mc in macro_calls:
result += macro_call_to_rst_paragraphs(mc)
return result


p = argparse.ArgumentParser()
p.add_argument("-i", "--input", help="path to AnalyzerOptions.def")
p.add_argument("-t", "--template", help="path of template file")
p.add_argument("-o", "--output", help="path of output file")
opts = p.parse_args()

with open(opts.template, encoding="utf-8") as f:
doc_template = f.read()

PLACEHOLDER = ".. OPTIONS_LIST_PLACEHOLDER\n"

rst_output = doc_template.replace(PLACEHOLDER, get_option_list(opts.input))

with open(opts.output, "w", newline="", encoding="utf-8") as f:
f.write(rst_output)
Loading