PCRE2Project
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 3 additions & 40 deletions b/‎.github/workflows/build.yml‎
Lines changed: 3 additions & 40 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎RunGrepTest‎
Lines changed: 3 additions & 0 deletions b/‎RunGrepTest‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎RunGrepTest.bat‎
Lines changed: 3 additions & 0 deletions b/‎RunGrepTest.bat‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎RunTest‎
Lines changed: 6 additions & 1 deletion b/‎RunTest‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎maint/FilterCoverage.py‎
Lines changed: 271 additions & 0 deletions b/‎maint/FilterCoverage.py‎
Lines changed: 271 additions & 0 deletions
diff --git a/‎maint/README‎
Lines changed: 7 additions & 0 deletions b/‎maint/README‎
Lines changed: 7 additions & 0 deletions
@@ -531,7 +531,7 @@ jobs:
       - name: Setup
         run: |
           sudo apt-get -qq update
-          sudo apt-get -qq install zlib1g-dev libbz2-dev libedit-dev
+          sudo apt-get -qq install zlib1g-dev libbz2-dev libedit-dev lcov
 
       - name: Checkout
         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
@@ -547,50 +547,13 @@ jobs:
       - name: Test
         run: |
           cd build
-          echo "== Running all tests with CTest =="
-          LLVM_PROFILE_FILE="coverage-%m.profraw" ctest -j1 --output-on-failure
-          echo ""
-          echo "== Re-running pcre2test with -malloc =="
-          LLVM_PROFILE_FILE="coverage-%m.profraw" srcdir=.. pcre2test=./pcre2test ../RunTest -malloc
-
-      - name: Report
-        run: |
-          LLVM_VER=`clang --version | head -n1 | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+' | cut -d. -f1`
-          echo "Using LLVM version $LLVM_VER"
-
-          # Merge the profiles gathered
-          cd build
-          llvm-profdata-$LLVM_VER merge -sparse coverage-*.profraw -o coverage.profdata
-
-          # Output HTML, for archiving and browsing later
-          llvm-cov-$LLVM_VER show \
-            -format=html -output-dir=coverage-report -show-line-counts-or-regions -show-branches=percent \
-            -instr-profile=coverage.profdata \
-            ./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
-            ../src/ ./
-
-          # Output LCOV-compatible output, for downstream tools
-          llvm-cov-$LLVM_VER export \
-            -format=lcov \
-            -instr-profile=coverage.profdata \
-            ./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
-            ../src/ ./ \
-            > ./coverage-lcov.info
-
-          # Output text summary to build log
-          echo '```' > "$GITHUB_STEP_SUMMARY"
-          llvm-cov-$LLVM_VER report \
-            -instr-profile=coverage.profdata \
-            ./pcre2test -object ./pcre2grep -object ./pcre2posix_test -object ./pcre2_jit_test \
-            ../src/ ./ \
-            >> "$GITHUB_STEP_SUMMARY"
-          echo '```' >> "$GITHUB_STEP_SUMMARY"
+          ../maint/RunCoverage
 
       - name: Upload report to GitHub artifacts
         uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
         with:
           name: "Coverage report"
-          path: './build/coverage-report'
+          path: './build/coverage-html'
           if-no-files-found: error
 
       - name: Upload report to Codecov
 
@@ -6,6 +6,7 @@ build-*/
 *.a
 *.gcda
 *.gcno
+*.profraw
 *.lo
 *.la
 *.pc
 
@@ -1148,6 +1148,9 @@ checkspecial '--line-buffered --colour=auto abc /dev/null' 1
 checkspecial '--line-buffered --color abc /dev/null' 1
 checkspecial '-dskip abc .' 1
 checkspecial '-Dread -Dskip abc /dev/null' 1
+checkspecial "-f $srcdir/testdata/greplistBad /dev/null" 2
+checkspecial "(unpaired /dev/null" 2
+checkspecial "-e (unpaired1 -e (unpaired2 /dev/null" 2
 
 # Clean up local working files
 rm -f testNinputgrep teststderrgrep testtrygrep testtemp1grep testtemp2grep
 
@@ -1089,6 +1089,9 @@ call :checkspecial "--line-buffered --colour=auto abc nul" 1 || exit /b 1
 call :checkspecial "--line-buffered --color abc nul" 1 || exit /b 1
 call :checkspecial "-dskip abc ." 1 || exit /b 1
 call :checkspecial "-Dread -Dskip abc nul" 1 || exit /b 1
+call :checkspecial "-f %srcdir%\testdata\greplistBad nul" 2 || exit /b 1
+call :checkspecial "(unpaired nul" 2 || exit /b 1
+call :checkspecial "-e (unpaired1 -e (unpaired2 nul" 2 || exit /b 1
 
 
 :: Clean up local working files
 
@@ -540,9 +540,14 @@ for bmode in "$test8" "$test16" "$test32"; do
     saverc=0
     checkspecial '-C' || saverc=$?
     checkspecial '--help' || saverc=$?
+    checkspecial "$bmode testSinput" || saverc=$?
+    checkspecial "$bmode $testdata/testinputheap" || saverc=$?
     if [ $support_setstack -eq 0 ] ; then
-      checkspecial '-S 1 -t 10 testSinput' || saverc=$?
+      checkspecial "$bmode -S 1 -t 10 testSinput" || saverc=$?
     fi
+    checkspecial -LM || saverc=$?
+    checkspecial -LP || saverc=$?
+    checkspecial -LS || saverc=$?
     if [ $saverc -eq 0 ] ; then
       echo "  OK"
     fi
 
@@ -0,0 +1,271 @@
+#! /usr/bin/env python3
+
+# Script which is a simple LCOV filter: removes DA/BRDA entries for lines marked in-source with
+# "LCOV_EXCL_LINE" or "LCOV_EXCL_START"/"LCOV_EXCL_STOP".
+#
+# Usage: python3 FilterCoverage.py coverage-lcov.info > coverage-lcov.filtered.info
+
+import sys
+import re
+
+def scan_exclusions(srcpath):
+    """Return a set of line numbers to exclude for this source file."""
+    with open(srcpath, "r", encoding="utf-8") as fh:
+        text = fh.readlines()
+    excl = set()
+    in_block = False
+    for i, line in enumerate(text, start=1):
+        if "LCOV_EXCL_LINE" in line:
+            excl.add(i)
+        if "LCOV_EXCL_START" in line:
+            in_block = True
+            excl.add(i)
+            continue
+        if "LCOV_EXCL_STOP" in line:
+            excl.add(i)
+            in_block = False
+            continue
+        if in_block:
+            excl.add(i)
+    return excl
+
+DA_RE = re.compile(r'^\s*DA:(\d+),(\d+)(,.*)?\s*$')
+LF_RE = re.compile(r'^\s*LF:(\d+)\s*$')
+LH_RE = re.compile(r'^\s*LH:(\d+)\s*$')
+BRDA_RE = re.compile(r'^\s*BRDA:(\d+),([e\d]+),(.*),([-\d]+)\s*$')
+BRF_RE = re.compile(r'^\s*BRF:(\d+)\s*$')
+BRH_RE = re.compile(r'^\s*BRH:(\d+)\s*$')
+FN_RE = re.compile(r'^\s*FN:(\d+),([^,\s]*)\s*$')
+FNDA_RE = re.compile(r'^\s*FNDA:(\d+),([^,\s]*)\s*$')
+FNF_RE = re.compile(r'^\s*FNF:(\d+)\s*$')
+FNH_RE = re.compile(r'^\s*FNH:(\d+)\s*$')
+
+def process_block(block_lines):
+    """Return processed block lines with excluded DA/BRDA removed and LF/LH fixed."""
+    if not block_lines:
+        return block_lines
+    # get SF path from first 'SF:' line (should be first)
+    first = block_lines[0]
+    assert first.lstrip().startswith('SF:')
+    sf_path = first.split(':', 1)[1].strip()
+    exclusions = scan_exclusions(sf_path)
+
+    new_lines = []
+    da_orig_found = 0
+    da_orig_hit = 0
+    da_new_found = 0
+    da_new_hit = 0
+    brda_orig_found = 0
+    brda_orig_hit = 0
+    brda_new_found = 0
+    brda_new_hit = 0
+    fnda_orig_found = 0
+    fnda_orig_hit = 0
+    fnda_new_found = 0
+    fnda_new_hit = 0
+
+    fn_exclusions = set()
+
+    # Pass 1: identify FN exclusions
+    for line in block_lines:
+        m_fn = FN_RE.match(line)
+        assert (m_fn is not None) == line.lstrip().startswith('FN:')
+        if m_fn:
+            fn_line = int(m_fn.group(1))
+            fn_name = m_fn.group(2)
+            if fn_line in exclusions:
+                fn_exclusions.add(fn_name)
+
+    # Pass 2: filter DA, BRDA, FN/FNDA; copy others verbatim
+    for line in block_lines:
+        m_da = DA_RE.match(line)
+        assert (m_da is not None) == line.lstrip().startswith('DA:')
+        if m_da:
+            line_num = int(m_da.group(1))
+            execution_count = int(m_da.group(2))
+            da_orig_found += 1
+            if execution_count > 0:
+                da_orig_hit += 1
+            if line_num in exclusions:
+                # drop this DA line
+                continue
+            da_new_found += 1
+            if execution_count > 0:
+                da_new_hit += 1
+            new_lines.append(line)
+            continue
+        m_brda = BRDA_RE.match(line)
+        assert (m_brda is not None) == line.lstrip().startswith('BRDA:')
+        if m_brda:
+            brda_orig_found += 1
+            taken = m_brda.group(4)
+            if taken != '-' and int(taken) > 0:
+                brda_orig_hit += 1
+            if int(m_brda.group(1)) in exclusions:
+                # drop this BRDA line
+                continue
+            brda_new_found += 1
+            if taken != '-' and int(taken) > 0:
+                brda_new_hit += 1
+            new_lines.append(line)
+            continue
+        m_fnda = FNDA_RE.match(line)
+        assert (m_fnda is not None) == line.lstrip().startswith('FNDA:')
+        if m_fnda:
+            fnda_orig_found += 1
+            fn_name = m_fnda.group(2)
+            count = int(m_fnda.group(1))
+            if count > 0:
+                fnda_orig_hit += 1
+            if fn_name in fn_exclusions:
+                # drop this FNDA line
+                continue
+            fnda_new_found += 1
+            if count > 0:
+                fnda_new_hit += 1
+            new_lines.append(line)
+            continue
+        m_fn = FN_RE.match(line)
+        assert (m_fn is not None) == line.lstrip().startswith('FN:')
+        if m_fn:
+            fn_line = int(m_fn.group(1))
+            fn_name = m_fn.group(2)
+            if fn_name in fn_exclusions:
+                # drop this FN line
+                continue
+            new_lines.append(line)
+            continue
+        # other lines: append unchanged
+        new_lines.append(line)
+
+    # Pass 3: fix LF/LH, BRF/BRH, FNF/FNH
+    # Mutate new_lines. If we find any LF/LH lines, check they have the expected original values.
+    # If so, replace with new values. If not, print a warning.
+    for i, line in enumerate(new_lines):
+        # LF
+        m_lf = LF_RE.match(line)
+        assert (m_lf is not None) == line.lstrip().startswith('LF:')
+        if m_lf:
+            # preserve leading whitespace exactly
+            leading = re.match(r'^(\s*)', line).group(1)
+            # replace with recomputed value (number of DA entries remaining)
+            new_lines[i] = f"{leading}LF:{da_new_found}\n"
+            # warn if original disagreed (useful for debugging)
+            try:
+                lf_orig = int(m_lf.group(1))
+                if lf_orig != da_orig_found:
+                    print(f"warning: original LF ({lf_orig}) != counted DA entries ({da_orig_found}) for {sf_path}", file=sys.stderr)
+            except Exception:
+                pass
+            continue
+
+        # LH
+        m_lh = LH_RE.match(line)
+        assert (m_lh is not None) == line.lstrip().startswith('LH:')
+        if m_lh:
+            leading = re.match(r'^(\s*)', line).group(1)
+            new_lines[i] = f"{leading}LH:{da_new_hit}\n"
+            try:
+                lh_orig = int(m_lh.group(1))
+                if lh_orig != da_orig_hit:
+                    print(f"warning: original LH ({lh_orig}) != counted DA hits ({da_orig_hit}) for {sf_path}", file=sys.stderr)
+            except Exception:
+                pass
+            continue
+
+        # BRF
+        m_brf = BRF_RE.match(line)
+        assert (m_brf is not None) == line.lstrip().startswith('BRF:')
+        if m_brf:
+            leading = re.match(r'^(\s*)', line).group(1)
+            # replace with recomputed branch-found (if you computed brda_new_found above)
+            new_lines[i] = f"{leading}BRF:{brda_new_found}\n"
+            try:
+                brf_orig = int(m_brf.group(1))
+                if brf_orig != brda_orig_found:
+                    print(f"warning: original BRF ({brf_orig}) != counted BRDA entries ({brda_orig_found}) for {sf_path}", file=sys.stderr)
+            except Exception:
+                pass
+            continue
+
+        # BRH
+        m_brh = BRH_RE.match(line)
+        assert (m_brh is not None) == line.lstrip().startswith('BRH:')
+        if m_brh:
+            leading = re.match(r'^(\s*)', line).group(1)
+            new_lines[i] = f"{leading}BRH:{brda_new_hit}\n"
+            try:
+                brh_orig = int(m_brh.group(1))
+                if brh_orig != brda_orig_hit:
+                    print(f"warning: original BRH ({brh_orig}) != counted BRDA hits ({brda_orig_hit}) for {sf_path}", file=sys.stderr)
+            except Exception:
+                pass
+            continue
+
+        # FNF
+        m_fnf = FNF_RE.match(line)
+        assert (m_fnf is not None) == line.lstrip().startswith('FNF:')
+        if m_fnf:
+            leading = re.match(r'^(\s*)', line).group(1)
+            new_lines[i] = f"{leading}FNF:{fnda_new_found}\n"
+            try:
+                fnf_orig = int(m_fnf.group(1))
+                if fnf_orig != fnda_orig_found:
+                    print(f"warning: original FNF ({fnf_orig}) != counted FNDA entries ({fnda_orig_found}) for {sf_path}", file=sys.stderr)
+            except Exception:
+                pass
+            continue
+
+        # FNH
+        m_fnh = FNH_RE.match(line)
+        assert (m_fnh is not None) == line.lstrip().startswith('FNH:')
+        if m_fnh:
+            leading = re.match(r'^(\s*)', line).group(1)
+            new_lines[i] = f"{leading}FNH:{fnda_new_hit}\n"
+            try:
+                fnh_orig = int(m_fnh.group(1))
+                if fnh_orig != fnda_orig_hit:
+                    print(f"warning: original FNH ({fnh_orig}) != counted FNDA hits ({fnda_orig_hit}) for {sf_path}", file=sys.stderr)
+            except Exception:
+                pass
+            continue
+
+    return new_lines
+
+def filter_lcov(in_fh, out_fh):
+    lines = in_fh.readlines()
+
+    i = 0
+    out_lines = []
+    while i < len(lines):
+        line = lines[i]
+        if line.lstrip().startswith('SF:'):
+            # buffer block until end_of_record
+            block = []
+            while i < len(lines):
+                block.append(lines[i])
+                if lines[i].strip() == 'end_of_record':
+                    i += 1
+                    break
+                i += 1
+            processed = process_block(block)
+            out_lines.extend(processed)
+        else:
+            out_lines.append(line)
+            i += 1
+
+    out_fh.writelines(out_lines)
+
+if __name__ == "__main__":
+    if len(sys.argv) > 3:
+        print("Usage: python3 FilterCoverage.py [infile [outfile]]", file=sys.stderr)
+        sys.exit(1)
+    if len(sys.argv) > 2:
+        with open(sys.argv[2], "w", encoding="utf-8") as out_fh:
+            with open(sys.argv[1], "r", encoding="utf-8") as in_fh:
+                filter_lcov(in_fh, out_fh)
+    elif len(sys.argv) > 1:
+        with open(sys.argv[1], "r", encoding="utf-8") as fh:
+            filter_lcov(fh, sys.stdout)
+    else:
+        filter_lcov(sys.stdin, sys.stdout)
@@ -60,6 +60,9 @@ GenerateUcpTables.py
   GenerateCommon.py and Unicode data files. The generated file contains tables
   for looking up Unicode property names.
 
+FilterCoverage.py
+  A small helper used by the RunCoverage script.
+
 LintMan
   A Perl script to check and update magic numbers in the documentation that
   correspond to configurable settings in the codebase.
@@ -95,6 +98,10 @@ pcre2_chartables.c.non-standard
 README
   This file.
 
+RunCoverage
+  A script used to generate the coverage report using Clang. It is called by
+  the GitHub CI actions, and can also be run by a developer locally.
+
 RunManifestTest
 RunManifestTest.ps1
   Scripts to generate and verify a list of files against an expected 'manifest'
-Original file line number
+Diff line change
 *.a
 *.gcda
 *.gcno
 +*.profraw
 *.lo
 *.la
 *.pc