Skip to content

Fix leaks #6241

@Rot127

Description

@Rot127

There are something around 120k leaks if the whole testsuite is run.
We will probably not fix those in our lifetime, but why not try.

The New Leaks workflow uploads all lsan logs as artifacts once done.

Here is a little vibe coded script which reads the stack traces and counts how often a filename + line number occurred in all collected LSAN stack traces:

#!/usr/bin/env python3
import argparse
import os
import re
from collections import Counter

def parse_lsan_directory(directory_path, stack_start, stack_end):
    """
    Parses all files in the given directory for LSAN stack traces.
    Returns a Counter object with keys as (file_path, line_number) tuples.
    """
    # Regex using named groups: 'filepath' and 'lineno'
    stack_pattern = re.compile(
        r'#(?P<idx>\d+)\s+0x[0-9a-f]+\s+in\s+.*?\s+(?P<filepath>[^\s]+):(?P<lineno>\d+)'
    )
    leak_sources = Counter()

    if not os.path.isdir(directory_path):
        print(f"Error: Directory '{directory_path}' not found.")
        return leak_sources

    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)

        if not os.path.isfile(file_path):
            continue

        try:
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                content = f.read()

            # Use finditer to get Match objects and access named groups
            matches = stack_pattern.finditer(content)
            for match in matches:
                f_path = match.group('filepath')
                line_no = int(match.group('lineno'))
                idx = int(match.group('idx'))
                if idx in range(stack_start, stack_end):
                    leak_sources[(f_path, line_no)] += 1

        except Exception as e:
            print(f"Could not read file {filename}: {e}")

    return leak_sources

def main():
    parser = argparse.ArgumentParser(
        description="Parse LSAN stack traces and identify the most common leak source locations."
    )

    parser.add_argument(
        "directory", 
        help="Path to the directory containing LSAN trace files."
    )

    parser.add_argument(
        "-s", "--stackstart", 
        type=int,
        default=1,
        help="Depth of stack trace to start collection."
    )

    parser.add_argument(
        "-e", "--stackend", 
        type=int,
        default=4,
        help="Depth of stack trace to stop collection (exclusive)"
    )

    parser.add_argument(
        "-n", "--top", 
        type=int, 
        default=10, 
        help="Number of top leak sources to display (default: 10)."
    )

    args = parser.parse_args()

    sources = parse_lsan_directory(args.directory, args.stackstart, args.stackend)

    if not sources:
        print("No leak sources found or directory is empty.")
        return

    sorted_sources = sources.most_common()

    print(f"\n{'Count':<8} | {'File':<50} | {'Line':<5}")
    print("-" * 70)

    limit = min(args.top, len(sorted_sources))
    for (filepath, lineno), count in sorted_sources[:limit]:
        print(f"{count:<8} | {filepath:<50} | {lineno:<5}")

if __name__ == "__main__":
    main()

Depending which lines in the stack traces are observed we get a different picture. But it still can be seen that a lot of them come from cmd objects.

./top_leaks.py -n 30 -s 2 ~/Downloads/lsan_logs/tmp/lsan_logs/

Count    | File                                               | Line 
----------------------------------------------------------------------
24558    | ../binrz/rizin/rizin.c                             | 57   
14144    | ../librz/bin/bin.c                                 | 354  
14056    | ../librz/bin/bin.c                                 | 295  
13824    | ../librz/bin/bfile.c                               | 180  
12569    | ../librz/main/rizin.c                              | 1177 
11875    | ../librz/bin/bobj.c                                | 532  
11871    | ../librz/core/cfile.c                              | 1014 
11850    | ../librz/core/cmd/cmd.c                            | 2773 
11799    | ../librz/core/cfile.c                              | 782  
9539     | ../librz/core/cmd/cmd_api.c                        | 799  
9409     | ../librz/core/cmd/cmd_api.c                        | 812  
9335     | ../librz/core/cmd/cmd.c                            | 1169 
9250     | ../librz/core/cmd/cmd.c                            | 1117 
8766     | ../librz/util/str.c                                | 1112 
8649     | ../librz/core/cmd/cmd.c                            | 2830 
8377     | ../librz/core/cmd/cmd.c                            | 2795 
7758     | ../librz/core/cmd/cmd.c                            | 2945 
6708     | ../librz/core/cmd/cmd.c                            | 2973 
6684     | ../binrz/rz-bin/rz-bin.c                           | 8    
6547     | ../librz/util/sdb/src/sdb.c                        | 574  
6446     | ../librz/util/sdb/src/sdb.c                        | 559  
6261     | ../librz/main/rizin.c                              | 337  
6217     | ../librz/bin/bobj.c                                | 512  
6215     | ../librz/arch/pdb_process.c                        | 793  
5872     | ../librz/arch/pdb_process.c                        | 752  
5869     | ../librz/main/rizin.c                              | 1430 
5637     | ../librz/core/cmd/cmd_api.c                        | 743  
5610     | ../librz/arch/pdb_process.c                        | 118  
5542     | ../librz/arch/pdb_process.c                        | 742  
5372     | ../librz/arch/pdb_process.c                        | 818  

Metadata

Metadata

Assignees

No one assigned

    Labels

    cmd-apiSwitch from cmd calls to APIperformanceA performance problem/enhancement

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions