diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml new file mode 100644 index 00000000..6d033257 --- /dev/null +++ b/.github/workflows/python-tests.yml @@ -0,0 +1,30 @@ +name: Python Tests + +on: + push: + paths: + - 'src/python/**' + - '.github/workflows/python-tests.yml' + pull_request: + paths: + - 'src/python/**' + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.8', '3.10', '3.12'] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install package with test dependencies + run: | + cd src/python/futag-package + pip install -e ".[test]" + - name: Run tests + run: | + cd src/python/futag-package + python -m pytest tests/ -v --tb=short diff --git a/.github/workflows/syntax-check.yml b/.github/workflows/syntax-check.yml new file mode 100644 index 00000000..8beea1f3 --- /dev/null +++ b/.github/workflows/syntax-check.yml @@ -0,0 +1,27 @@ +name: Syntax Check + +on: [push, pull_request] + +jobs: + python-syntax: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.12' + - name: Check Python syntax + run: | + python -c " + import ast, sys, pathlib + errors = [] + for f in pathlib.Path('src/python/futag-package/src/futag').glob('*.py'): + try: + ast.parse(f.read_text()) + except SyntaxError as e: + errors.append(f'{f}: {e}') + if errors: + for e in errors: print(e) + sys.exit(1) + print('All Python files: syntax OK') + " diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b5dca91..a46904ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,48 +1,90 @@ # Change Log All notable changes to this project will be documented in this file. - + The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [3.0.1] - 2025 + +### Major Refactoring +- Extracted `BaseGenerator` ABC from 5 duplicated generator classes (12,924 → 4,767 lines) +- Created `GeneratorState` dataclass replacing 13 mutable instance variables +- Extracted `BaseFuzzer` from duplicated Fuzzer/NatchFuzzer (1,602 → 891 lines) +- Moved `ContextGenerator` and `NatchGenerator` to separate modules +- `FuzzDataProviderGenerator` reduced from 2,715 to 222 lines +- `BlobStamperGenerator` reduced from 2,693 to 37 lines + +### New Features +- Custom exception hierarchy (`futag.exceptions`: FutagError, InvalidPathError, etc.) +- Python `logging` module integration (replaces print statements) +- `GeneratorState.save()`/`restore_from()` for clean recursive backtracking +- GitHub Actions CI (python-tests.yml, syntax-check.yml) + +### Bug Fixes +- Fixed null-pointer dereference in FutagConsumerAnalyzer (cfg->size() before null check) +- Fixed memory leak in FutagConsumerAnalyzer (new int() instead of new int[]) +- Fixed `param_list` duplication bug in `__save_old_values` +- Fixed 14+ file handle leaks (bare open() → with statements) +- Fixed `_build_ovearall_coverage` typo → `_build_overall_coverage` +- Replaced bubble sort in `sort_callexprs` with `sorted()` + +### Documentation +- Created docs/architecture.md, docs/generators.md, docs/checkers.md, docs/python-api.md +- Created CONTRIBUTING.md +- Added comprehensive docstrings and return type hints to all methods +- Added GPL v3 license headers to all Python source files +- Translated template-script.py comments from Russian to English + +### C++ Checker Improvements +- Added `MAX_CFG_BLOCKS` and `REPORT_FILENAME_RAND_LEN` constants +- Changed `SmallString<0>` to `SmallString<256>` +- Added Doxygen comments to all checker methods +- Synchronized base files with LLVM 18 variants + ## 20250824 - Add support for Fuzzed Data Provider -## 20220716 -- Add modules preprocessor to Futag python-package -- Fix README of Futag python-package +## 20230807 +- Optimize ConsumerBuilder +- Add example for context-generation https://github.com/thientc/Futag-tests/tree/main/json-c-contexts -## 20220727 -- Add custom-llvm: download and build llvm, clang, compiler-rt -- Fix document +## 20230711 +- Support generation fuzz driver for Natch data: https://github.com/thientc/Futag-tests/tree/main/Natch -## 20220801 -- Add multi-processing support for compiling -- TODO: Check analysis result befor generating fuzz-driver +## 20230522 +- Fix error in generator +- Add generation for pugi::char_t *& -## 20220808 -- Fix bug in generator -- Fix for svace analysing -- add first version of fuzzer and result of Fuzzing for Svace +## 20230417 +- Add generation for anonymous function +- Fix error in Builder -## 20220811 -- Fix bug in generator -- Add pre release package -- Fix document +## 20230320 +- Support for context generation -## 20220821 -- Fix bug in generator -- Add release package -- Fix document +## 20230305 +- Fix error python in Builder +- Fix error python in Generator for wchar_t string -## 20220911 -- Add support for fuzz-introspector -- Migrate to llvm-14.0.6 +## 20230214 +- Add is_simple for 4consummer_analysis_db +- Add CFG and DFC analysis +- Add Fuzzer extra params support -## 20220921 -- Add support for Makefile -- Generation for global function of C++ libraries -- Add testing repository: https://github.com/thientc/Futag-tests +## 20221220 +- Fix errors while compiling AFL++, return coverage parameters +- Fix Readme +- change LLVM_ENABLE_ZLIB to ON + +## 20221107 +- And generation for anonymous functions +- Reformat Python classes +- Fix included paths of compiling command + +## 20221018 +- Add support for C++, generate for constructors and for method of class, which has default constructors +- Tested on FreeImage and Pugixml ## 20221012 - Add support for AFLplusplus @@ -51,43 +93,38 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Add compilition database of building - Add analysis of headers -## 20221018 -- Add support for C++, generate for constructors and for method of class, which has default constructors -- Tested on FreeImage and Pugixml - -## 20221107 -- And generation for anonymous functions -- Reformat Python classes -- Fix included paths of compiling command - -## 20221220 -- Fix errors while compiling AFL++, return coverage parameters -- Fix Readme -- change LLVM_ENABLE_ZLIB to ON +## 20220921 +- Add support for Makefile +- Generation for global function of C++ libraries +- Add testing repository: https://github.com/thientc/Futag-tests -## 20230214 -- Add is_simple for 4consummer_analysis_db -- Add CFG and DFC analysis -- Add Fuzzer extra params support +## 20220911 +- Add support for fuzz-introspector +- Migrate to llvm-14.0.6 -## 20230320 -- Support for context generation +## 20220821 +- Fix bug in generator +- Add release package +- Fix document -## 20230305 -- Fix error python in Builder -- Fix error python in Generator for wchar_t string +## 20220811 +- Fix bug in generator +- Add pre release package +- Fix document -## 20230417 -- Add generation for anonymous function -- Fix error in Builder +## 20220808 +- Fix bug in generator +- Fix for svace analysing +- add first version of fuzzer and result of Fuzzing for Svace -## 20230522 -- Fix error in generator -- Add generation for pugi::char_t *& +## 20220801 +- Add multi-processing support for compiling +- TODO: Check analysis result befor generating fuzz-driver -## 20230711 -- Support generation fuzz driver for Natch data: https://github.com/thientc/Futag-tests/tree/main/Natch +## 20220727 +- Add custom-llvm: download and build llvm, clang, compiler-rt +- Fix document -## 20230807 -- Optimize ConsumerBuilder -- Add example for context-generation https://github.com/thientc/Futag-tests/tree/main/json-c-contexts \ No newline at end of file +## 20220716 +- Add modules preprocessor to Futag python-package +- Fix README of Futag python-package diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..130d8ffd --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,66 @@ +# Contributing to Futag + +## Development Setup + +### Prerequisites +- CMake >= 3.13.4 +- GCC >= 7.1.0 +- Python >= 3.8 +- pip >= 22.1.1 + +### Building the LLVM toolchain + +```bash +cd custom-llvm && ./prepare.sh +cd ../build && ./build.sh +``` + +### Installing the Python package (development mode) + +```bash +cd src/python/futag-package +pip install -e ".[test]" +``` + +### Running tests + +```bash +cd src/python/futag-package +python -m pytest tests/ -v +``` + +## Code Style + +### Python +- Follow PEP 8 +- Use Google-style docstrings +- All new methods must have type hints and docstrings +- Use `logging` module instead of `print()` +- Use `with` statements for file I/O +- Raise exceptions from `futag.exceptions` instead of `sys.exit()` + +### C++ +- Follow LLVM coding style (configured in .clang-format) +- Column limit: 80 characters +- Indent: 4 spaces + +## Pull Request Process + +1. Create a feature branch from `main` +2. Make your changes with clear commit messages +3. Ensure all Python tests pass +4. Update documentation if APIs change +5. Submit PR with description of changes + +## Adding a New Generator Backend + +See docs/generators.md for the BaseGenerator pattern. Implement 10 abstract `_gen_*` methods. + +## LLVM Version Support + +When adding support for a new LLVM version: +1. Create version-specific source files (e.g., FutagAnalyzer19.cpp) +2. Create version-specific CMakeLists (e.g., CMakeLists19.txt) +3. Update Checkers.td with version-specific copy +4. Update build/build.sh version detection +5. The base file should always match the latest supported LLVM version diff --git a/How-to-work-with-Futag.md b/How-to-work-with-Futag.md index d22888c5..75038c30 100644 --- a/How-to-work-with-Futag.md +++ b/How-to-work-with-Futag.md @@ -164,4 +164,59 @@ json_file="/path/to/analysis/folder/futag-analysis-result.json" #Путь к ф g.gen_targets() # генерация фаззинг-оберток g.compile_targets( # компиляция фаззинг-оберток True, # генерация Makefile - 16) # количество потоков при компиляции \ No newline at end of file + 16) # количество потоков при компиляции + +## Генераторы фаззинг-оберток + +Futag поддерживает несколько вариантов генерации фаззинг-оберток. Все генераторы наследуют от базового класса `BaseGenerator`. + +### Generator (стандартный) +Использует `memcpy()` для чтения данных из буфера. Поддерживает C и C++. + +```python +from futag.generator import Generator +generator = Generator(futag_llvm_path, library_root) +generator.gen_targets(max_wrappers=10) +generator.compile_targets(workers=4) +``` + +### FuzzDataProviderGenerator +Использует API `FuzzedDataProvider` из libFuzzer для типобезопасного чтения данных. Только C++. + +```python +from futag.fdp_generator import FuzzDataProviderGenerator +generator = FuzzDataProviderGenerator(futag_llvm_path, library_root) +generator.gen_targets(max_wrappers=100) +generator.compile_targets(workers=4, keep_failed=True) +``` + +### ContextGenerator +Генерирует обёртки на основе контекстов использования библиотеки в потребительских программах. + +```python +from futag.generator import ContextGenerator +ctx_gen = ContextGenerator(futag_llvm_path, library_root) +ctx_gen.gen_context() +ctx_gen.compile_targets(keep_failed=True) +``` + +## Управление логированием + +Futag использует модуль `logging` для вывода информации. Настройка уровня логирования: + +```python +import logging +logging.basicConfig(level=logging.INFO) # Стандартный вывод +logging.basicConfig(level=logging.DEBUG) # Подробный вывод +logging.basicConfig(level=logging.WARNING) # Тихий режим +``` + +## Обработка ошибок + +Модуль `futag.exceptions` предоставляет иерархию исключений: +- `FutagError` — базовое исключение +- `InvalidPathError` — неверный путь к файлу или директории +- `InvalidConfigError` — неверная конфигурация +- `BuildError` — ошибка сборки библиотеки +- `GenerationError` — ошибка генерации фаззинг-оберток +- `AnalysisError` — ошибка анализа результатов \ No newline at end of file diff --git a/README.en.md b/README.en.md index b6f1f312..4324b159 100644 --- a/README.en.md +++ b/README.en.md @@ -56,7 +56,7 @@ Thank you for acknowledging the authors' work when you use FUTAG or report bugs You can try building FUTAG using the provided Dockerfiles for Ubuntu: https://github.com/ispras/Futag/tree/main/product-tests/build-test ## 3.2. Using a prepackaged release -- Download the latest release (for example, futag-llvm.2.1.1.tar.xz) from https://github.com/ispras/Futag/releases/tag/2.1.1 and extract it. The tool will be installed to the futag-llvm directory. +- Download the latest release (for example, futag-llvm.3.0.1.tar.xz) from https://github.com/ispras/Futag/releases/tag/v3.0.1 and extract it. The tool will be installed to the futag-llvm directory. - To build AFLplusplus, run the buildAFLplusplus.sh script in futag-llvm: ```bash @@ -119,7 +119,7 @@ This script creates the Futag/build directory and copies Futag/custom-llvm/build - Make sure the futag-.tar.gz package is installed under futag-llvm/python-package/: ```bash ~$ pip install -r futag-llvm/python-package/requirements.txt - ~$ pip install futag-llvm/python-package/futag-2.1.1.tar.gz + ~$ pip install futag-llvm/python-package/futag-3.0.1.tar.gz ``` ### 4.1. Automatic generation of fuzzing wrappers when usage contexts are absent @@ -213,6 +213,14 @@ A template for run scripts can be found here: https://github.com/ispras/Futag/bl A test repository was created at https://github.com/thientc/Futag-tests to test FUTAG on various libraries (json-c, php, FreeImage, etc.). You can try testing using the Docker container at https://github.com/ispras/Futag/tree/main/product-tests/libraries-test. +## Documentation + +- [Architecture Overview](docs/architecture.md) — three-layer design, data flow +- [С/C++ Checkers](docs/checkers.md) — FutagAnalyzer and FutagConsumerAnalyzer +- [Generator System](docs/generators.md) — BaseGenerator pattern and backends +- [Python API Reference](docs/python-api.md) — complete class and method reference +- [Contributing Guide](CONTRIBUTING.md) — development setup and code style + ## 5. Authors - [Tran Chi Thien](https://github.com/thientc/) diff --git a/README.md b/README.md index 427fd465..10f41d49 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ FUTAG использует, в качестве внешнего интерф Вы можете попробовать собрать Futag с готовыми [Докер-файлами](https://github.com/ispras/Futag/tree/main/product-tests/build-test) для ОС Ubuntu. ## 2.2. Использование предварительно упакованного пакета -- Загрузите последнюю версию [futag-llvm.2.1.1.tar.xz](https://github.com/ispras/Futag/releases/tag/2.1.1) и разархивируйте. В результате инструмент будет установлен в директорию futag-llvm. +- Загрузите последнюю версию [futag-llvm.3.0.1.tar.xz](https://github.com/ispras/Futag/releases/tag/v3.0.1) и разархивируйте. В результате инструмент будет установлен в директорию futag-llvm. - Для сборки AFLplusplus запустите скрипт buildAFLplusplus.sh в futag-llvm ```bash @@ -105,7 +105,7 @@ FUTAG использует, в качестве внешнего интерф - Предварительно должен быть установлен пакет futag-<версия>.tar.gz по пути futag-llvm/python-package/: ```bash ~$ pip install -r futag-llvm/python-package/requirements.txt - ~$ pip install futag-llvm/python-package/futag-2.1.1.tar.gz + ~$ pip install futag-llvm/python-package/futag-3.0.1.tar.gz ``` ## 3.1. Автоматическая генерация фаззинг-оберток в условии отсутствия контекстов использования @@ -198,6 +198,14 @@ context_generator.compile_targets( #компиляция сгенерирова Был создан [репозиторий](https://github.com/thientc/Futag-tests) для тестирования Futag над библиотеками (json-c, php, FreeImage, и т.д.), можете протестировать с [Докер-контейнером](https://github.com/ispras/Futag/tree/main/product-tests/libraries-test). +## Документация + +- [Архитектура проекта](docs/architecture.md) — трёхуровневая архитектура, поток данных +- [С/C++ анализаторы](docs/checkers.md) — FutagAnalyzer и FutagConsumerAnalyzer +- [Система генераторов](docs/generators.md) — паттерн BaseGenerator и бэкенды +- [Справочник Python API](docs/python-api.md) — полное описание классов и методов +- [Руководство для разработчиков](CONTRIBUTING.md) — настройка среды и стиль кода + # 4. Авторы - [Чан Ти Тхиен](https://github.com/thientc/) diff --git a/References.md b/References.md index ae44f047..631a4787 100644 --- a/References.md +++ b/References.md @@ -23,4 +23,15 @@ https://stackoverflow.com/questions/49877836/how-can-i-get-macro-name-from-cstyl https://stackoverflow.com/questions/58620022/how-to-get-the-macro-and-its-expansion-with-clang-libtooling - [LLVM Loop Terminology](https://llvm.org/docs/LoopTerminology.html) -- [CFGBlock](https://clang.llvm.org/doxygen/classclang_1_1CFGBlock.html) \ No newline at end of file +- [CFGBlock](https://clang.llvm.org/doxygen/classclang_1_1CFGBlock.html) + +## Futag Internal Documentation +- [Architecture Overview](docs/architecture.md) +- [C++ Checker Documentation](docs/checkers.md) +- [Generator System Documentation](docs/generators.md) +- [Python API Reference](docs/python-api.md) + +## Fuzzing Libraries +- [FuzzedDataProvider](https://llvm.org/docs/LibFuzzer.html#fuzzed-data-provider) — libFuzzer utility for type-safe data consumption +- [AFL++](https://aflplus.plus/) — American Fuzzy Lop plus plus +- [nlohmann/json](https://github.com/nlohmann/json) — JSON for Modern C++ (used in Futag checkers) \ No newline at end of file diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 00000000..e34a5930 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,186 @@ +# Futag Architecture Overview + +This document describes the architecture of FUTAG (Fuzz target Automated Generator), +a tool from ISP RAS for automated generation of fuzzing wrappers (fuzz targets) for +C/C++ software libraries. Futag analyzes library source code via custom Clang/LLVM +static analysis checkers and generates fuzz targets in LibFuzzer or AFLplusplus format. + +## Three-Layer Architecture + +``` ++---------------------------------------------------------------+ +| Layer 3: Build Infrastructure | +| custom-llvm/prepare.sh --> build/build.sh | +| (download LLVM sources, patch Futag checkers, compile) | ++---------------------------------------------------------------+ + | + futag-llvm/ toolchain + | ++---------------------------------------------------------------+ +| Layer 1: C++ Clang Checkers | +| src/Checkers/ src/clang/ | +| (StaticAnalyzer plugins: extract function signatures, | +| types, and usage patterns from library source code) | ++---------------------------------------------------------------+ + | + JSON analysis data + | ++---------------------------------------------------------------+ +| Layer 2: Python Orchestration | +| src/python/futag-package/src/futag/ | +| | +| preprocessor.py --> generator.py --> fuzzer.py | +| (build & analyze) (gen targets) (run fuzzing) | ++---------------------------------------------------------------+ +``` + +### Layer 1: C++ Clang Checkers + +Located in `src/Checkers/` and `src/clang/`, these are static analysis plugins that +run inside Clang's StaticAnalyzer framework. They extract function signatures, type +information, and usage patterns from the target library's source code and serialize +the results as JSON. + +For detailed documentation, see [docs/checkers.md](checkers.md). + +### Layer 2: Python Orchestration + +Located in `src/python/futag-package/src/futag/`, this layer provides the user-facing +Python API that drives the full pipeline: + +- **preprocessor.py** -- `Builder` builds and analyzes target libraries; + `ConsumerBuilder` handles library+consumer pairs +- **generator.py** -- `Generator` produces fuzz targets from analysis JSON; + `ContextGenerator` uses consumer usage contexts +- **fuzzer.py** -- `BaseFuzzer`, `Fuzzer`, and `NatchFuzzer` execute generated + targets with configurable timeouts, memory limits, and sanitizers +- **sysmsg.py** -- Constants and error messages (LIBFUZZER, AFLPLUSPLUS engine + identifiers, paths) + +For detailed documentation, see [docs/generators.md](generators.md) and +[docs/python-api.md](python-api.md). + +### Layer 3: Build Infrastructure + +Located in `custom-llvm/` and `build/`, shell scripts that: + +1. Download LLVM sources (`custom-llvm/prepare.sh`) +2. Patch in Futag's checkers and Clang modifications +3. Build the complete toolchain via CMake (`build/build.sh`) +4. Optionally build AFLplusplus support (`futag-llvm/buildAFLplusplus.sh`) + +## Data Flow + +``` + Library Futag LLVM Python Package + Source Code Toolchain (futag) + | | | + v v | + +----------+ +-------------+ | + | .c / .h |--->| scan-build | | + | files | | (checkers) | | + +----------+ +------+------+ | + | | + JSON analysis | + data files | + | v + | +-------------------+ + +----------->| preprocessor.py | + | Builder.analyze() | + +--------+----------+ + | + futag-analysis-result.json + | + v + +-------------------+ + | generator.py | + | gen_targets() | + +--------+----------+ + | + .c / .cpp fuzz targets + | + v + +-------------------+ + | generator.py | + | compile_targets() | + +--------+----------+ + | + instrumented binaries + | + v + +-------------------+ + | fuzzer.py | + | Fuzzer.fuzz() | + +--------+----------+ + | + v + crashes / coverage +``` + +## Key Components + +| Component | Location | Documentation | +|-----------|----------|---------------| +| Clang Checkers | `src/Checkers/`, `src/clang/` | [docs/checkers.md](checkers.md) | +| Generator Classes | `src/python/futag-package/src/futag/` | [docs/generators.md](generators.md) | +| Python API | `src/python/futag-package/` | [docs/python-api.md](python-api.md) | +| Build Scripts | `custom-llvm/`, `build/` | [README.en.md](../README.en.md) | + +## Directory Structure + +``` +Futag/ + build/ # Build scripts for compiling the LLVM toolchain + custom-llvm/ # Scripts to download and patch LLVM sources + docs/ # Detailed documentation + checkers.md # Clang checker documentation + generators.md # Generator class documentation + python-api.md # Python API reference + examples/ # Example scripts and configurations + product-tests/ # Dockerized tests + build-test/ # Build validation (Ubuntu 20.04/22.04/24.04, Alt 11/12) + libraries-test/ # End-to-end tests against real libraries + package-test/ # Python package tests + src/ + Checkers/ # C++ Clang StaticAnalyzer checker sources + clang/ # Clang modifications for Futag + python/ + futag-package/ # Python package (pip-installable) + src/futag/ # Core Python modules + vendors/ + json/ # nlohmann/json (C++ JSON library for checkers) + workshop/ # Library-specific tutorials +``` + +## LLVM Version Support + +Futag supports multiple LLVM versions: **14**, **18**, and **19**. + +The project maintains version-specific copies of source files using a naming +convention with version suffixes: + +| Base File | LLVM 14 | LLVM 18 | +|-----------|---------|---------| +| `FutagAnalyzer.cpp` | `FutagAnalyzer14.cpp` | `FutagAnalyzer18.cpp` | +| `ASTMatchFinder.cpp` | `ASTMatchFinder14.cpp` | `ASTMatchFinder18.cpp` | +| `CMakeLists.txt` | `CMakeLists14.txt` | `CMakeLists18.txt` | +| `Checkers.td` | `Checkers14.td` | `Checkers18.td` | + +The build script (`build/build.sh`) detects or accepts the target LLVM version and +selects the correct version-specific files during compilation. + +When adding support for a new LLVM version (e.g., 19): + +1. Create version-specific source files (e.g., `FutagAnalyzer19.cpp`) +2. Create a version-specific `CMakeLists19.txt` +3. Update `Checkers.td` with a version-specific copy +4. Update `build/build.sh` version detection logic + +The base (unsuffixed) file should always match the latest supported LLVM version. + +## Getting Started + +For build instructions and setup, see [README.en.md](../README.en.md). + +For a complete workflow example, see `src/python/template-script.py` and the +`workshop/` directory for library-specific tutorials. diff --git a/docs/python-api.md b/docs/python-api.md new file mode 100644 index 00000000..20bd00af --- /dev/null +++ b/docs/python-api.md @@ -0,0 +1,323 @@ +# Futag Python API Reference + +## Quick Start + +```python +from futag.preprocessor import Builder +from futag.generator import Generator + +# Step 1: Build and analyze the library +builder = Builder("/path/to/futag-llvm", "/path/to/library", clean=True) +builder.auto_build() +builder.analyze() + +# Step 2: Generate fuzz targets +generator = Generator("/path/to/futag-llvm", "/path/to/library") +generator.gen_targets(anonymous=False, max_wrappers=10) +generator.compile_targets(workers=4, keep_failed=True) +``` + +## Module Overview + +| Module | Classes | Purpose | +|--------|---------|---------| +| `futag.preprocessor` | `Builder`, `ConsumerBuilder` | Build & analyze libraries | +| `futag.generator` | `Generator`, `ContextGenerator`, `NatchGenerator` | Generate fuzz targets | +| `futag.fdp_generator` | `FuzzDataProviderGenerator` | FDP-based fuzz targets | +| `futag.blob_stamper_generator` | `BlobStamperGenerator` | BlobStamper-based targets | +| `futag.fuzzer` | `Fuzzer`, `NatchFuzzer` | Execute fuzz targets | +| `futag.base_generator` | `BaseGenerator` (ABC) | Shared generator infrastructure | +| `futag.generator_state` | `GeneratorState` | State management dataclass | +| `futag.sysmsg` | (constants) | Constants and error messages | + +--- + +## Preprocessor Module + +### Builder + +Builds and analyzes a target library using the Futag-patched Clang toolchain. + +```python +from futag.preprocessor import Builder + +builder = Builder( + futag_llvm_package="/path/to/futag-llvm", # Required: path to compiled toolchain + library_root="/path/to/library", # Required: path to library source + flags="-g -O0", # Compiler flags (default: debug + sanitizer + coverage) + clean=False, # Delete futag dirs before starting + intercept=True, # Use intercept-build for compile_commands.json + build_path=".futag-build", # Build directory name + install_path=".futag-install", # Install directory name + analysis_path=".futag-analysis", # Analysis output directory name + processes=4, # Parallel build workers + build_ex_params="" # Extra build params (e.g., "--with-openssl") +) + +builder.auto_build() # Auto-detect build system (configure/cmake/makefile/meson) +builder.analyze() # Aggregate checker JSON into futag-analysis-result.json +``` + +**Build methods:** `build_cmake()`, `build_configure()`, `build_makefile()`, `build_meson()` + +**Output:** `.futag-analysis/futag-analysis-result.json` + +### ConsumerBuilder + +Analyzes a consumer program to extract library usage contexts. + +```python +from futag.preprocessor import ConsumerBuilder + +consumer_builder = ConsumerBuilder( + futag_llvm_package="/path/to/futag-llvm", + library_root="/path/to/library", + consumer_root="/path/to/consumer", # Required: consumer program source + clean=False, + processes=4, +) + +consumer_builder.auto_build() +consumer_builder.analyze() # Outputs .futag-consumer/futag-contexts.json +``` + +--- + +## Generator Module + +### Generator (Standard) + +Generates fuzz targets using raw `memcpy()` buffer consumption. Supports both C and C++. + +```python +from futag.generator import Generator + +generator = Generator( + futag_llvm_package="/path/to/futag-llvm", + library_root="/path/to/library", + alter_compiler="", # Override compiler path + target_type=0, # 0=LIBFUZZER, 1=AFLPLUSPLUS + json_file=".futag-analysis/futag-analysis-result.json", + output_path="futag-fuzz-drivers", # Output directory + build_path=".futag-build", + install_path=".futag-install", + delimiter=".", # Separator in variant directory names + exclude_headers=None, # List of headers to exclude +) + +generator.gen_targets( + anonymous=False, # Generate for anonymous namespace functions + from_list="", # JSON file with function name filter list + max_wrappers=10, # Max variants per function + max_functions=10000, # Stop after N functions +) + +generator.compile_targets( + workers=4, # Parallel compilation workers + keep_failed=False, # Keep failed compilation logs + extra_params="", # Extra compiler parameters + extra_include="", # Extra include directories + extra_dynamiclink="", # Extra dynamic libraries + flags="", # Custom compiler flags + coverage=False, # Add coverage instrumentation + keep_original=True, # Keep .futag-fuzz-drivers temp directory +) +``` + +### FuzzDataProviderGenerator + +Uses libFuzzer's `FuzzedDataProvider` API. C++ only, type-safe data consumption. + +```python +from futag.fdp_generator import FuzzDataProviderGenerator + +generator = FuzzDataProviderGenerator( + futag_llvm_package="/path/to/futag-llvm", + library_root="/path/to/library", +) +generator.gen_targets(anonymous=False, max_wrappers=100) +generator.compile_targets(workers=4, keep_failed=True) +``` + +### BlobStamperGenerator + +Uses LibBlobStamper. Inherits FDP's type generation but supports both C and C++. + +```python +from futag.blob_stamper_generator import BlobStamperGenerator + +generator = BlobStamperGenerator( + futag_llvm_package="/path/to/futag-llvm", + library_root="/path/to/library", +) +``` + +### ContextGenerator + +Generates fuzz targets from consumer program usage contexts. + +```python +from futag.generator import ContextGenerator + +ctx_gen = ContextGenerator( + futag_llvm_package="/path/to/futag-llvm", + library_root="/path/to/library", + db_json_file=".futag-analysis/futag-analysis-result.json", + context_json_file=".futag-consumer/futag-contexts.json", + output_path="futag-context-fuzz-drivers", +) + +ctx_gen.gen_context(max_wrappers=10) # Note: gen_context(), not gen_targets() +ctx_gen.compile_targets(keep_failed=True) +``` + +### NatchGenerator + +Generates fuzz targets from Natch crash trace data. + +```python +from futag.generator import NatchGenerator + +natch_gen = NatchGenerator( + futag_llvm_package="/path/to/futag-llvm", + library_root="/path/to/library", + json_file="/path/to/natch-output.json", # Required: Natch JSON file +) + +natch_gen.parse_values() # Parse Natch JSON and create seed corpus +natch_gen.gen_targets() +natch_gen.compile_targets(workers=4) +``` + +### Creating a Custom Generator + +Subclass `BaseGenerator` and implement 10 abstract methods: + +```python +from futag.base_generator import BaseGenerator + +class MyGenerator(BaseGenerator): + @property + def default_headers(self) -> list: + return ["stdio.h", "my_custom_header.h"] + + @property + def supports_c(self) -> bool: + return True + + @property + def needs_buffer_check(self) -> bool: + return True + + @property + def harness_preamble(self) -> str: + return "" + + def _gen_builtin(self, param_name, gen_type_info) -> dict: + return { + "gen_lines": [...], # C/C++ code to declare and initialize + "gen_free": [...], # Cleanup code + "buffer_size": [...], # Size expressions + } + + # Implement: _gen_strsize, _gen_cstring, _gen_wstring, _gen_cxxstring, + # _gen_enum, _gen_array, _gen_void, _gen_qualifier, _gen_pointer +``` + +--- + +## Fuzzer Module + +### Fuzzer + +Executes generated fuzz targets and collects crashes. + +```python +from futag.fuzzer import Fuzzer + +fuzzer = Fuzzer( + futag_llvm_package="/path/to/futag-llvm", + fuzz_driver_path="futag-fuzz-drivers", # Directory with compiled fuzz targets + debug=False, # Print debug info + gdb=False, # Debug crashes with GDB + svres=False, # Generate svres XML for Svace + fork=1, # LibFuzzer fork mode (1=no fork) + totaltime=300, # Total fuzzing time per target (seconds) + timeout=10, # Per-test timeout (seconds) + memlimit=2048, # RSS memory limit (MB, 0=disabled) + coverage=False, # Generate coverage reports + leak=False, # Detect memory leaks + source_path="", # Source path for coverage HTML +) + +fuzzer.fuzz(extra_param="") # Run fuzzing on all targets +``` + +### NatchFuzzer + +Same as Fuzzer but adds Natch corpus path support. + +```python +from futag.fuzzer import NatchFuzzer + +fuzzer = NatchFuzzer( + futag_llvm_package="/path/to/futag-llvm", + fuzz_driver_path="futag-fuzz-drivers", + totaltime=60, + debug=True, +) +fuzzer.fuzz() +``` + +--- + +## Constants Reference (sysmsg.py) + +### Generation Type Constants (GEN_*) + +| Constant | Value | C/C++ Type | +|----------|-------|------------| +| `GEN_BUILTIN` | 0 | int, float, double, etc. | +| `GEN_CSTRING` | 1 | char *, const char * | +| `GEN_WSTRING` | 2 | wchar_t * | +| `GEN_CXXSTRING` | 3 | std::string | +| `GEN_ENUM` | 4 | enum types | +| `GEN_ARRAY` | 5 | fixed-size arrays | +| `GEN_VOID` | 6 | void * | +| `GEN_QUALIFIER` | 7 | const/volatile wrapper | +| `GEN_POINTER` | 8 | pointer types | +| `GEN_STRUCT` | 9 | struct types | +| `GEN_UNION` | 10 | union types | +| `GEN_CLASS` | 11 | C++ class types | +| `GEN_INCOMPLETE` | 12 | incomplete types | +| `GEN_FUNCTION` | 13 | function pointers | +| `GEN_INPUT_FILE` | 14 | file path (input) | +| `GEN_OUTPUT_FILE` | 15 | file path (output) | +| `GEN_UNKNOWN` | 18 | unknown types | + +### Function Type Constants + +| Constant | Value | Meaning | +|----------|-------|---------| +| `FUNC_CXXMETHOD` | 0 | C++ class method | +| `FUNC_CONSTRUCTOR` | 1 | C++ constructor | +| `FUNC_DEFAULT_CONSTRUCTOR` | 2 | C++ default constructor | +| `FUNC_DESTRUCTOR` | 3 | C++ destructor | +| `FUNC_GLOBAL` | 4 | Global C function | +| `FUNC_STATIC` | 5 | Static function | + +### Access Type Constants + +| Constant | Value | Meaning | +|----------|-------|---------| +| `AS_PUBLIC` | 0 | Public access | +| `AS_PROTECTED` | 1 | Protected access | +| `AS_PRIVATE` | 2 | Private access | +| `AS_NONE` | 3 | No specifier (C functions) | + +### Fuzz Driver Format + +| Constant | Value | Format | +|----------|-------|--------| +| `LIBFUZZER` | 0 | LibFuzzer harness | +| `AFLPLUSPLUS` | 1 | AFL++ harness | diff --git a/product-tests/README.md b/product-tests/README.md index f77493b7..160016a7 100644 --- a/product-tests/README.md +++ b/product-tests/README.md @@ -1,8 +1,40 @@ -# Futag product test +# Futag Product Tests -This directory contains: -- test of building from source code - "build-test"; -- test of latest compiled package - "package-test"; -- test Futag for libraries in [the testing repository](https://github.com/thientc/Futag-tests) - "libraries-test". +Dockerized test suites for validating Futag across multiple platforms. -The script "prepare-package.sh" compress compiled folder futag-llvm and copy to package-test directory. \ No newline at end of file +## Test Categories + +### build-test/ +Validates building the LLVM toolchain with Futag checkers from source. + +| Platform | Dockerfile | LLVM Version | +|----------|-----------|--------------| +| Ubuntu 20.04 | `ubuntu20/ubuntu20.Dockerfile` | LLVM 14 | +| Ubuntu 22.04 | `ubuntu22/ubuntu22.Dockerfile` | LLVM 18 | +| Ubuntu 24.04 | `ubuntu24/ubuntu24.Dockerfile` | LLVM 19 | +| Alt Linux 11 | `alt11/alt11.Dockerfile` | LLVM 18 | +| Alt Linux 12 | `alt12/alt12.Dockerfile` | LLVM 19 | + +### libraries-test/ +End-to-end tests against real open-source libraries (json-c, php, FreeImage, etc.). + +### package-test/ +Tests the pre-built Python package installation and basic functionality. + +## Running Tests + +Each test directory contains: +- `Dockerfile` — Container build definition +- `build.sh` — Build the Docker image +- `run.sh` — Run the test container + +Example: +```bash +cd build-test/ubuntu24 +./build.sh +./run.sh +``` + +## CI Integration + +See [.github/workflows/](../../.github/workflows/) for automated CI workflows. diff --git a/product-tests/libraries-test/alt11/README.md b/product-tests/libraries-test/alt11/README.md index ab0b1b16..41fc1194 100644 --- a/product-tests/libraries-test/alt11/README.md +++ b/product-tests/libraries-test/alt11/README.md @@ -1,8 +1,8 @@ # Тестирование инструмента Futag Futag обновился до LLVM 18.8. -По ссылке https://github.com/ispras/Futag/releases/tag/v3.0.0 можно скачать готовые пакеты инструмента: -* futag-3.0.0.tar.gz - Python-пакет +По ссылке https://github.com/ispras/Futag/releases/tag/v3.0.1 можно скачать готовые пакеты инструмента: +* futag-3.0.1.tar.gz - Python-пакет * futag-llvm18.alt11.tar.xz - Собранный Futag для Альт-Линукса (версия 11) * futag-llvm.latest.tar.xz- Собранный Futag для Убунту-Линукса diff --git a/src/VERSION b/src/VERSION index 56fea8a0..cb2b00e4 100644 --- a/src/VERSION +++ b/src/VERSION @@ -1 +1 @@ -3.0.0 \ No newline at end of file +3.0.1 diff --git a/src/python/futag-package/README.md b/src/python/futag-package/README.md index 9d15f246..5174d9f9 100644 --- a/src/python/futag-package/README.md +++ b/src/python/futag-package/README.md @@ -20,7 +20,7 @@ This python package is for building library, generating and compiling fuzz-drive ## 1. Install ```bash -pip install dist/futag-2.1.1.tar.gz +pip install dist/futag-3.0.1.tar.gz ``` ## 2. Preprocessor diff --git a/src/python/futag-package/setup.cfg b/src/python/futag-package/setup.cfg index 8a36fd1a..24d7c963 100644 --- a/src/python/futag-package/setup.cfg +++ b/src/python/futag-package/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = futag -version = 3.0.0 +version = 3.0.1 author = Futag-team of ISP RAS author_email = thientcgithub@gmail.com description = Python package of Futag diff --git a/src/python/futag-package/setup.py b/src/python/futag-package/setup.py index 43be336a..6bac8c56 100644 --- a/src/python/futag-package/setup.py +++ b/src/python/futag-package/setup.py @@ -2,15 +2,34 @@ setup( name='futag', - version='3.0.0', + version='3.0.1', author='Futag-team of ISP RAS', author_email='thientcgithub@gmail.com', packages=['futag'], + package_dir={'futag': 'src/futag'}, scripts=[], - url='https://github.com/ispras/Futag/tree/main/src/python/futag-package', + url='https://github.com/ispras/Futag', + project_urls={ + 'Documentation': 'https://github.com/ispras/Futag/tree/main/docs', + 'Source': 'https://github.com/ispras/Futag/tree/main/src/python/futag-package', + 'Bug Tracker': 'https://github.com/ispras/Futag/issues', + }, license='LICENSE', - description='Futag tools for creating fuzz targets of software library', + description='Futag - Fuzz target Automated Generator for software libraries', long_description=open('README.md').read(), + long_description_content_type='text/markdown', + classifiers=[ + 'Development Status :: 4 - Beta', + 'Intended Audience :: Developers', + 'Topic :: Software Development :: Testing', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + ], + python_requires='>=3.8', install_requires=[ "pathlib", "argparse", diff --git a/src/python/futag-package/src/futag/__init__.py b/src/python/futag-package/src/futag/__init__.py index e69de29b..17ee786d 100644 --- a/src/python/futag-package/src/futag/__init__.py +++ b/src/python/futag-package/src/futag/__init__.py @@ -0,0 +1,40 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + +"""Futag - Fuzz target Automated Generator. + +A tool from ISP RAS for automated generation of fuzzing wrappers +(fuzz targets) for software libraries. Analyzes library source code +via custom Clang/LLVM static analysis checkers and generates fuzz +targets in LibFuzzer or AFLplusplus format. + +Typical usage:: + + from futag.preprocessor import Builder + from futag.generator import Generator + + builder = Builder(futag_llvm_path, library_root, clean=True) + builder.auto_build() + builder.analyze() + + generator = Generator(futag_llvm_path, library_root) + generator.gen_targets() + generator.compile_targets(workers=4) + +Classes: + Builder: Builds and analyzes target libraries. + ConsumerBuilder: Analyzes consumer programs using the library. + Generator: Generates fuzz targets using raw memcpy buffer consumption. + FuzzDataProviderGenerator: Generates fuzz targets using FuzzedDataProvider API. + BlobStamperGenerator: Generates fuzz targets using LibBlobStamper. + ContextGenerator: Generates context-aware fuzz targets from consumer usage. + NatchGenerator: Generates fuzz targets from Natch crash traces. + Fuzzer: Executes generated fuzz targets and collects crashes. + NatchFuzzer: Executes Natch-generated fuzz targets with corpus support. +""" + +__version__ = "3.0.1" + +import logging +logging.getLogger('futag').addHandler(logging.NullHandler()) diff --git a/src/python/futag-package/src/futag/base_generator.py b/src/python/futag-package/src/futag/base_generator.py index 06b78c99..8fb9662c 100644 --- a/src/python/futag-package/src/futag/base_generator.py +++ b/src/python/futag-package/src/futag/base_generator.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + # ************************************************** # ** ______ __ __ ______ ___ ______ ** # ** / ____/ / / / / /_ __/ / | / ____/ ** @@ -14,6 +18,7 @@ # ************************************************** import json +import logging import pathlib import copy import os @@ -22,9 +27,11 @@ from subprocess import Popen, PIPE from multiprocessing import Pool from typing import List -from distutils.dir_util import copy_tree +from shutil import copytree from futag.sysmsg import * + +logger = logging.getLogger(__name__) from futag.preprocessor import delete_folder from futag.generator_state import GeneratorState @@ -66,53 +73,327 @@ def harness_preamble(self): # ------------------------------------------------------------------ # @abstractmethod - def _gen_builtin(self, param_name, gen_type_info) -> dict: - """Declare and assign value for a builtin type.""" + def _gen_builtin(self, param_name: str, gen_type_info: dict) -> dict: + """Generate initialization code for a C/C++ builtin type variable. + + Subclasses must produce code that declares a variable of the given + builtin type and assigns it a value derived from the fuzz input. + + Args: + param_name: The generated variable name (e.g. "b_param0"). + gen_type_info: Type metadata dict from analysis JSON containing + at least ``type_name`` (e.g. "int", "double") and + ``gen_type`` (always ``GEN_BUILTIN``). + + Returns: + dict with three keys: + - ``gen_lines`` (list[str]): C/C++ code lines that declare and + initialize the variable. + - ``gen_free`` (list[str]): Cleanup code lines (typically empty + for builtins). + - ``buffer_size`` (list[str]): Size expressions consumed from + the fuzz buffer. + + Example (Generator / memcpy backend): + ``{"gen_lines": ["int b_x;\\n", + "memcpy(&b_x, futag_pos, sizeof(int));\\n", + "futag_pos += sizeof(int);\\n"], + "gen_free": [], + "buffer_size": ["sizeof(int)"]}`` + + Example (FDP backend): + ``{"gen_lines": ["auto b_x = provider.ConsumeIntegral();\\n"], + "gen_free": [], + "buffer_size": []}`` + """ ... @abstractmethod - def _gen_strsize(self, param_name, param_type, dyn_size_idx, array_name) -> dict: - """Generate a string-size parameter.""" + def _gen_strsize(self, param_name: str, param_type: str, dyn_size_idx: int, array_name: str) -> dict: + """Generate code for a parameter that holds the size of a preceding string. + + When a function parameter immediately follows a string parameter and + has a size-compatible type, this method is called instead of + ``_gen_builtin`` to tie the size variable to the dynamic string length. + + Args: + param_name: The generated variable name (e.g. "sz_len"). + param_type: The C type name of the size parameter (e.g. "size_t"). + dyn_size_idx: Index into the dynamic string size array + (``dyn_cstring_size``, ``dyn_wstring_size``, or + ``dyn_cxxstring_size``). + array_name: Name of the size array to index into, one of + ``"dyn_cstring_size"``, ``"dyn_wstring_size"``, or + ``"dyn_cxxstring_size"``. + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + (same structure as ``_gen_builtin``). + + Example (Generator / memcpy backend): + ``{"gen_lines": ["size_t sz_len = dyn_cstring_size[0];\\n"], + "gen_free": [], + "buffer_size": []}`` + + Example (FDP backend): + ``{"gen_lines": ["size_t sz_len = dyn_cstring_size[0];\\n"], + "gen_free": [], + "buffer_size": []}`` + """ ... @abstractmethod - def _gen_cstring(self, param_name, gen_type_info, dyn_cstring_size_idx) -> dict: - """Declare and assign value for a C string type.""" + def _gen_cstring(self, param_name: str, gen_type_info: dict, dyn_cstring_size_idx: int) -> dict: + """Generate initialization code for a C string (``char *``) variable. + + Produces code that allocates a buffer, copies fuzz data into it, and + null-terminates the result. + + Args: + param_name: The generated variable name (e.g. "str_buf0"). + gen_type_info: Type metadata dict with at least ``type_name`` + and ``gen_type`` (``GEN_CSTRING``). + dyn_cstring_size_idx: Current index into the ``dyn_cstring_size`` + array, used to determine the dynamic length of this string. + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + (same structure as ``_gen_builtin``). Typically includes a + ``malloc``/``free`` pair in Generator, or + ``ConsumeRandomLengthString`` in FDP. + + Example (Generator / memcpy backend): + ``{"gen_lines": ["char * str_buf0 = ...", + "memcpy(str_buf0, futag_pos, ...);\\n"], + "gen_free": ["if (str_buf0) free(str_buf0);\\n"], + "buffer_size": []}`` + + Example (FDP backend): + ``{"gen_lines": ["auto str_buf0 = provider.ConsumeRandomLengthString();\\n"], + "gen_free": [], + "buffer_size": []}`` + """ ... @abstractmethod - def _gen_wstring(self, param_name, gen_type_info, dyn_wstring_size_idx) -> dict: - """Declare and assign value for a wide string type.""" + def _gen_wstring(self, param_name: str, gen_type_info: dict, dyn_wstring_size_idx: int) -> dict: + """Generate initialization code for a wide string (``wchar_t *``) variable. + + Similar to ``_gen_cstring`` but for wide-character strings. Produces + code that allocates a ``wchar_t`` buffer, copies fuzz data, and + null-terminates. + + Args: + param_name: The generated variable name (e.g. "str_wbuf0"). + gen_type_info: Type metadata dict with ``type_name`` and + ``gen_type`` (``GEN_WSTRING``). + dyn_wstring_size_idx: Current index into the + ``dyn_wstring_size`` array. + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + (same structure as ``_gen_builtin``). + + Example (Generator / memcpy backend): + ``{"gen_lines": ["wchar_t * str_wbuf0 = ...", + "memcpy(str_wbuf0, futag_pos, ...);\\n"], + "gen_free": ["if (str_wbuf0) free(str_wbuf0);\\n"], + "buffer_size": []}`` + + Example (FDP backend): + ``{"gen_lines": ["auto str_wbuf0 = provider.ConsumeRandomLengthString();\\n"], + "gen_free": [], + "buffer_size": []}`` + """ ... @abstractmethod - def _gen_cxxstring(self, param_name, gen_type_info, dyn_cxxstring_size_idx) -> dict: - """Declare and assign value for a C++ string type.""" + def _gen_cxxstring(self, param_name: str, gen_type_info: dict, dyn_cxxstring_size_idx: int) -> dict: + """Generate initialization code for a C++ ``std::string`` variable. + + Produces code that creates an ``std::string`` from fuzz input data. + + Args: + param_name: The generated variable name (e.g. "strcxx_s0"). + gen_type_info: Type metadata dict with ``type_name`` and + ``gen_type`` (``GEN_CXXSTRING``). + dyn_cxxstring_size_idx: Current index into the + ``dyn_cxxstring_size`` array. + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + (same structure as ``_gen_builtin``). + + Example (Generator / memcpy backend): + ``{"gen_lines": ["char * strcxx_s0_buffer = ...", + "std::string strcxx_s0(strcxx_s0_buffer);\\n"], + "gen_free": ["free(strcxx_s0_buffer);\\n"], + "buffer_size": []}`` + + Example (FDP backend): + ``{"gen_lines": ["auto strcxx_s0 = provider.ConsumeRandomLengthString();\\n"], + "gen_free": [], + "buffer_size": []}`` + """ ... @abstractmethod - def _gen_enum(self, enum_record, param_name, gen_type_info, compiler_info, anonymous=False) -> dict: - """Declare and assign value for an enum type.""" + def _gen_enum(self, enum_record: dict, param_name: str, gen_type_info: dict, compiler_info: dict, anonymous: bool = False) -> dict: + """Generate initialization code for an enum type variable. + + Produces code that selects one of the enum's valid values from + the fuzz input. + + Args: + enum_record: The enum definition dict from analysis JSON, + containing ``enum_values`` (list of valid values) and + ``qname``. + param_name: The generated variable name (e.g. "e_mode"). + gen_type_info: Type metadata dict with ``type_name`` and + ``gen_type`` (``GEN_ENUM``). + compiler_info: Compilation context dict (with ``compiler``, + ``command``, ``file``, ``location`` keys) used to + determine C vs C++ treatment. + anonymous: If True, strip anonymous namespace qualifiers + from generated code. + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + (same structure as ``_gen_builtin``). + + Example (Generator / memcpy backend): + ``{"gen_lines": ["int e_mode_value;\\n", + "memcpy(&e_mode_value, futag_pos, sizeof(int));\\n", + "MyEnum e_mode = static_cast(values[...]);\\n"], + "gen_free": [], + "buffer_size": ["sizeof(int)"]}`` + + Example (FDP backend): + ``{"gen_lines": ["auto e_mode = provider.ConsumeIntegral();\\n"], + "gen_free": [], + "buffer_size": []}`` + """ ... @abstractmethod - def _gen_array(self, param_name, gen_type_info) -> dict: - """Declare and assign value for an array type.""" + def _gen_array(self, param_name: str, gen_type_info: dict) -> dict: + """Generate initialization code for a fixed-size array variable. + + Produces code that declares an array and fills it with fuzz data. + + Args: + param_name: The generated variable name (e.g. "a_buf"). + gen_type_info: Type metadata dict with ``type_name``, + ``length`` (array size), and ``gen_type`` (``GEN_ARRAY``). + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + (same structure as ``_gen_builtin``). + + Example (Generator / memcpy backend): + ``{"gen_lines": ["int a_buf[10];\\n", + "memcpy(a_buf, futag_pos, 10 * sizeof(int));\\n", + "futag_pos += 10 * sizeof(int);\\n"], + "gen_free": [], + "buffer_size": ["10 * sizeof(int)"]}`` + + Example (FDP backend): + ``{"gen_lines": ["int a_buf[10];\\n", + "for (int i=0; i<10; i++) a_buf[i] = provider.ConsumeIntegral();\\n"], + "gen_free": [], + "buffer_size": []}`` + """ ... @abstractmethod - def _gen_void(self, param_name) -> dict: - """Declare and assign value for a void type.""" + def _gen_void(self, param_name: str) -> dict: + """Generate initialization code for a ``void *`` parameter. + + Void pointers are generally difficult to fuzz meaningfully. + Implementations may produce a raw buffer cast or mark the + parameter as non-generable. + + Args: + param_name: The generated variable name (e.g. "a_data"). + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + (same structure as ``_gen_builtin``). + + Example (Generator / memcpy backend): + ``{"gen_lines": ["void * a_data = futag_pos;\\n"], + "gen_free": [], + "buffer_size": []}`` + + Example (FDP backend): + ``{"gen_lines": ["// void type not generated\\n"], + "gen_free": [], + "buffer_size": []}`` + """ ... @abstractmethod - def _gen_qualifier(self, param_name, prev_param_name, gen_type_info) -> dict: - """Declare and assign value for a qualified type.""" + def _gen_qualifier(self, param_name: str, prev_param_name: str, gen_type_info: dict) -> dict: + """Generate initialization code for a type-qualified (``const``/``volatile``) variable. + + Wraps a previously generated variable with the appropriate + qualifier, typically by casting or assigning. + + Args: + param_name: The generated variable name for this qualifier + layer (e.g. "q_param0"). + prev_param_name: The variable name from the previous + generation layer that this qualifier wraps. + gen_type_info: Type metadata dict with ``type_name`` + (the fully qualified type) and ``gen_type`` + (``GEN_QUALIFIER``). + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + (same structure as ``_gen_builtin``). + + Example (Generator / memcpy backend): + ``{"gen_lines": ["const int q_param0 = b_param0;\\n"], + "gen_free": [], + "buffer_size": []}`` + + Example (FDP backend): + ``{"gen_lines": ["const int q_param0 = b_param0;\\n"], + "gen_free": [], + "buffer_size": []}`` + """ ... @abstractmethod - def _gen_pointer(self, param_name, prev_param_name, gen_type_info) -> dict: - """Declare and assign value for a pointer type.""" + def _gen_pointer(self, param_name: str, prev_param_name: str, gen_type_info: dict) -> dict: + """Generate initialization code for a pointer type variable. + + Wraps a previously generated variable by taking its address or + allocating a pointer to hold the value. + + Args: + param_name: The generated variable name for this pointer + layer (e.g. "p_param0"). + prev_param_name: The variable name from the previous + generation layer whose address is taken. + gen_type_info: Type metadata dict with ``type_name`` + (the pointer type, e.g. "int *") and ``gen_type`` + (``GEN_POINTER``). + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + (same structure as ``_gen_builtin``). + + Example (Generator / memcpy backend): + ``{"gen_lines": ["int * p_param0 = &b_param0;\\n"], + "gen_free": [], + "buffer_size": []}`` + + Example (FDP backend): + ``{"gen_lines": ["int * p_param0 = &b_param0;\\n"], + "gen_free": [], + "buffer_size": []}`` + """ ... # ------------------------------------------------------------------ # @@ -188,8 +469,7 @@ def __init__(self, futag_llvm_package: str, library_root: str, target_type: int self.json_file = pathlib.Path(json_file) if self.json_file.exists(): - f = open(self.json_file.as_posix()) - if not f.closed: + with open(self.json_file.as_posix()) as f: self.target_library = json.load(f) tmp_output_path = "." + output_path # create directory for function targets if not exists @@ -265,7 +545,8 @@ def _get_compile_command(self, file): """ if (self.build_path / "compile_commands.json").exists(): compile_commands = self.build_path / "compile_commands.json" - commands = json.load(open(compile_commands.as_posix())) + with open(compile_commands.as_posix()) as f: + commands = json.load(f) for command in commands: if pathlib.Path(command["file"]) == pathlib.Path(file): extension = command["file"].split(".")[-1] @@ -378,7 +659,13 @@ def _get_function_header(self, func_location): break return included_headers - def _add_header(self, function_headers): + def _add_header(self, function_headers: list) -> None: + """Add headers to the current state if not already present. + + Args: + function_headers: List of header include strings + (e.g. ``['"mylib.h"', '']``). + """ for h in function_headers: if h not in self.state.header: self.state.header.append(h) @@ -436,7 +723,17 @@ def _search_return_types(self, param_gen_list, curr_function, function_lists): }) return result - def _append_gen_dict(self, curr_gen): + def _append_gen_dict(self, curr_gen: dict) -> None: + """Append generation results to the current state. + + Merges the code lines, cleanup lines, and buffer size expressions + from a single type-generation result into the accumulating state. + + Args: + curr_gen: A generation result dict with keys ``gen_lines``, + ``gen_free``, and ``buffer_size``. May be empty or None, + in which case this is a no-op. + """ if curr_gen: self.state.buffer_size += curr_gen["buffer_size"] self.state.gen_lines += curr_gen["gen_lines"] @@ -446,7 +743,26 @@ def _append_gen_dict(self, curr_gen): # Complex shared generation methods # # ------------------------------------------------------------------ # - def _gen_struct(self, struct_name, struct, gen_info): + def _gen_struct(self, struct_name: str, struct: dict, gen_info: dict) -> dict: + """Generate initialization code for a struct type by iterating over fields. + + Declares a struct variable and recursively generates initialization + code for each of its fields using the appropriate type-specific + generation method. + + Args: + struct_name: The generated variable name for the struct + (e.g. "s_config"). + struct: The struct record dict from analysis JSON, containing + a ``fields`` list where each field has ``field_name`` and + ``gen_list``. + gen_info: Type metadata dict for the struct parameter with at + least ``type_name`` (the struct's C type name). + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + aggregated from all field initializations. + """ gen_lines = [gen_info["type_name"] + " " + struct_name + ";\n"] gen_free = [] buffer_size = [] @@ -704,8 +1020,26 @@ def _gen_file_descriptor(self, param_name, gen_type_info): "buffer_size": [] } - def _gen_var_function(self, func_param_name: str, func): - """ Initialize for argument of function call """ + def _gen_var_function(self, func_param_name: str, func: dict) -> dict: + """Generate code to initialize a variable via a function call. + + When a parameter's type matches the return type of another library + function, this method generates code that calls that function to + produce the needed value. It recursively generates all arguments + required by the called function. + + Args: + func_param_name: The variable name to assign the function's + return value to (e.g. "s_config"). + func: The function dict from analysis JSON for the function + to call, containing ``params``, ``qname``, + ``return_type``, ``func_type``, etc. + + Returns: + dict with keys ``gen_lines``, ``gen_free``, ``buffer_size`` + aggregated from the function's argument generation and the + call itself. + """ # curr_dyn_size = 0 param_list = [] curr_gen_string = -1 @@ -955,8 +1289,24 @@ def _gen_var_function(self, func_param_name: str, func): # File management methods # # ------------------------------------------------------------------ # - def _wrapper_file(self, func): + def _wrapper_file(self, func: dict) -> dict: + """Create a wrapper source file for a fuzz target. + + Creates the directory structure and opens a new source file for + writing the generated fuzz driver. Each function may have up to + ``self.max_wrappers`` variants, stored in numbered subdirectories. + + Args: + func: The function dict from analysis JSON, used to derive + the file name from ``qname`` and the file extension from + ``location.fullpath``. + Returns: + dict with two keys: + - ``file``: An open file handle for writing, or None on + error. + - ``msg``: A status message string describing the result. + """ # if anonymous: # filename = func["name"] # filepath = self.tmp_output_path / "anonymous" @@ -1009,8 +1359,22 @@ def _wrapper_file(self, func): "msg": "Successed: " + full_path + " created!" } - def _anonymous_wrapper_file(self, func): + def _anonymous_wrapper_file(self, func: dict): + """Create a wrapper file for anonymous namespace functions. + + Similar to ``_wrapper_file`` but copies the original source file + content into the wrapper first (so that the anonymous-namespace + function is accessible), then opens the file in append mode for + adding the harness code. + + Args: + func: The function dict from analysis JSON, containing + ``name``, ``hash``, and ``location.fullpath``. + Returns: + An open file handle in append mode, or None if the file + could not be created or the wrapper limit was exceeded. + """ # if anonymous: # filename = func["name"] # filepath = self.tmp_output_path / "anonymous" @@ -1050,16 +1414,29 @@ def _anonymous_wrapper_file(self, func): filepath / filename / dir_name / file_name).as_posix() with open(source_path, 'r') as s: source_file = s.read() - d = open(full_path_destination, "w") + with open(full_path_destination, "w") as d: d.write("//"+func["hash"] + "\n") d.write(source_file) - d.close() f = open(full_path_destination, 'a') if f.closed: return None return f - def _log_file(self, func, anonymous: bool = False): + def _log_file(self, func: dict, anonymous: bool = False): + """Create a log file for recording generation decisions. + + Opens a ``.log`` file alongside the generated fuzz driver to + capture why generation succeeded or failed for a given function. + + Args: + func: The function dict from analysis JSON. + anonymous: If True, use the short ``name`` and place the + log under an ``anonymous`` subdirectory. + + Returns: + An open file handle for writing, or None if the file could + not be created or the wrapper limit was exceeded. + """ if anonymous: filename = func["name"] filepath = self.tmp_output_path / "anonymous" @@ -1071,7 +1448,7 @@ def _log_file(self, func, anonymous: bool = False): # qname = func["qname"] if len(filename) > 250: - print("Error: File name is too long (>250 characters)!") + logger.error("File name is too long (>250 characters)!") return None dir_name = filename + str(file_index) @@ -1088,7 +1465,7 @@ def _log_file(self, func, anonymous: bool = False): break if file_index > self.max_wrappers: - print("Warning: exeeded maximum number of generated fuzzing-wrappers for each function!") + logger.warning("exeeded maximum number of generated fuzzing-wrappers for each function!") return None (filepath / filename / dir_name).mkdir(parents=True, exist_ok=True) @@ -1097,7 +1474,7 @@ def _log_file(self, func, anonymous: bool = False): full_path = (filepath / filename / dir_name / file_name).as_posix() f = open(full_path, 'w') if f.closed: - print("crreate file error: ", full_path) + logger.error("crreate file error: %s", full_path) return None return f @@ -1105,12 +1482,28 @@ def _log_file(self, func, anonymous: bool = False): # State save/restore # # ------------------------------------------------------------------ # - def _save_state(self): - """Save the current generation state for later restoration.""" + def _save_state(self) -> "GeneratorState": + """Save current generator state for backtracking. + + Creates a deep copy of ``self.state`` so that the generator can + try multiple generation strategies (e.g. different functions that + return the needed struct type) and revert if one fails. + + Returns: + A deep-copied ``GeneratorState`` snapshot. + """ return self.state.save() - def _restore_state(self, saved_state): - """Restore generation state from a previously saved copy.""" + def _restore_state(self, saved_state) -> None: + """Restore generator state from a saved copy. + + Reverts ``self.state`` to a snapshot previously obtained via + ``_save_state``, discarding all mutations made since then. + + Args: + saved_state: A ``GeneratorState`` snapshot from + ``_save_state()``. + """ self.state.restore_from(saved_state) # ------------------------------------------------------------------ # @@ -1118,7 +1511,34 @@ def _restore_state(self, saved_state): # __gen_anonymous_function # # ------------------------------------------------------------------ # - def _gen_target_function(self, func, param_id, anonymous=False) -> bool: + def _gen_target_function(self, func: dict, param_id: int, anonymous: bool = False) -> bool: + """Recursively generate a complete fuzz target for a function. + + Processes function parameters one at a time (indexed by + ``param_id``). For each parameter, dispatches to the appropriate + type-specific generation method. When all parameters have been + processed (base case: ``param_id == len(func['params'])``), + writes the complete harness file including headers, buffer size + checks, generated code lines, the function call, and cleanup. + + For struct and class parameters, this method may try multiple + generation strategies (via different return-type-matching + functions), saving and restoring state between attempts. + + Args: + func: The function dict from analysis JSON containing + ``params``, ``qname``, ``return_type``, ``func_type``, + ``location``, etc. + param_id: Zero-based index of the parameter currently being + processed. Incremented on each recursive call. + anonymous: If True, generate an anonymous-namespace-aware + wrapper that includes the original source file. + + Returns: + True if the harness file was successfully written, False + if generation failed (e.g. unsupported type, missing + constructor, exceeded wrapper limit). + """ malloc_free = [ "unsigned char *", "char *", @@ -1144,7 +1564,7 @@ def _gen_target_function(self, func, param_id, anonymous=False) -> bool: if (not len(self.state.buffer_size) and not self.state.dyn_cstring_size_idx and not self.state.dyn_cxxstring_size_idx and not self.state.dyn_wstring_size_idx and not self.state.file_idx) or not self.state.gen_this_function: log = self._log_file(func, self.gen_anonymous) if not log: - print(CANNOT_CREATE_LOG_FILE, func["qname"]) + logger.error(f"{CANNOT_CREATE_LOG_FILE} {func['qname']}") if not anonymous: return False else: @@ -1158,9 +1578,9 @@ def _gen_target_function(self, func, param_id, anonymous=False) -> bool: f = self._anonymous_wrapper_file(func) if not f: self.state.gen_this_function = False - print(CANNOT_CREATE_WRAPPER_FILE, func["qname"]) + logger.error(f"{CANNOT_CREATE_WRAPPER_FILE} {func['qname']}") return False - print(WRAPPER_FILE_CREATED, f.name) + logger.info(f"{WRAPPER_FILE_CREATED} {f.name}") for line in self._gen_header(func["location"]["fullpath"]): f.write("// " + line) @@ -1168,9 +1588,8 @@ def _gen_target_function(self, func, param_id, anonymous=False) -> bool: else: # generate file name (normal path) wrapper_result = self._wrapper_file(func) - print("Generating fuzzing-wapper for function ", - func["qname"], ": ") - print("-- ", wrapper_result["msg"]) + logger.info("Generating fuzzing-wapper for function %s:", func["qname"]) + logger.info("-- %s", wrapper_result["msg"]) if not wrapper_result["file"]: self.state.gen_this_function = False return False @@ -1506,7 +1925,7 @@ def _gen_target_function(self, func, param_id, anonymous=False) -> bool: if gen_type_info["gen_type"] == GEN_REFSTRING: if not anonymous: - print("!!!GEN_REFSTRING\n\n\n") + logger.debug("!!!GEN_REFSTRING") # GEN FILE NAME OR # GEN STRING if (curr_param["param_usage"] in ["FILE_PATH_READ", "FILE_PATH_WRITE", "FILE_PATH_RW", "FILE_PATH"] or curr_param["param_name"] in ["filename", "file", "filepath"] or curr_param["param_name"].find('file') != -1 or curr_param["param_name"].find('File') != -1) and len(curr_param["gen_list"]) == 1: curr_name = "f_" + curr_name # string_prefix @@ -1757,12 +2176,23 @@ def _gen_target_function(self, func, param_id, anonymous=False) -> bool: # gen_targets # # ------------------------------------------------------------------ # - def gen_targets(self, anonymous: bool = False, from_list: str = "", max_wrappers: int = 10, max_functions: int = 10000): - """ - Parameters - ---------- - anonymous: bool - option for generating fuzz-targets of non-public functions, default to False. + def gen_targets(self, anonymous: bool = False, from_list: str = "", max_wrappers: int = 10, max_functions: int = 10000) -> None: + """Generate fuzz targets for all eligible functions in the library. + + Iterates over the analyzed function list, filters by access type + and storage class, and calls ``_gen_target_function`` for each + eligible function. Results are saved under ``self.tmp_output_path``. + + Args: + anonymous: If True, also generate fuzz targets for functions + in anonymous namespaces. Defaults to False. + from_list: Path to a JSON file containing a list of function + names to restrict generation to. If empty, all eligible + functions are processed. Defaults to "". + max_wrappers: Maximum number of fuzz-driver variants to + generate per function. Defaults to 10. + max_functions: Maximum total number of functions to process. + Defaults to 10000. """ # Load the list of functions from the provided JSON file if specified if from_list: @@ -1770,7 +2200,7 @@ def gen_targets(self, anonymous: bool = False, from_list: str = "", max_wrappers with open(from_list, 'r') as f: function_list = json.load(f) except Exception as e: - print(f"Error loading function list from {from_list}: {e}") + logger.error(f"Error loading function list from {from_list}: {e}") function_list = [] else: function_list = [] @@ -1792,8 +2222,7 @@ def gen_targets(self, anonymous: bool = False, from_list: str = "", max_wrappers if func_index > max_functions: break if func["access_type"] == AS_NONE and func["fuzz_it"] and func["storage_class"] < 2 and (func["parent_hash"] == ""): - print( - "-- [Futag] Try to generate fuzz-driver for function: ", func["name"], "...") + logger.info("Try to generate fuzz-driver for function: %s...", func["name"]) C_generated_function.append(func["name"]) self.state.reset() self.state.curr_function = func @@ -1805,8 +2234,7 @@ def gen_targets(self, anonymous: bool = False, from_list: str = "", max_wrappers # For C++, Declare object of class and then call the method if func["access_type"] == AS_PUBLIC and func["fuzz_it"] and func["func_type"] in [FUNC_CXXMETHOD, FUNC_CONSTRUCTOR, FUNC_DEFAULT_CONSTRUCTOR, FUNC_GLOBAL, FUNC_STATIC] and (not "::operator" in func["qname"]): Cplusplus_usual_class_method.append(func["qname"]) - print( - "-- [Futag] Try to generate fuzz-driver for class method: ", func["name"], "...") + logger.info("Try to generate fuzz-driver for class method: %s...", func["name"]) self.state.reset() self.state.curr_function = func if "(anonymous" in func["qname"]: @@ -1834,8 +2262,8 @@ def gen_targets(self, anonymous: bool = False, from_list: str = "", max_wrappers "Cplusplus_anonymous_class_methods": Cplusplus_anonymous_class_method, "C_unknown_functions": C_unknown_function } - json.dump(self.result_report, open( - (self.build_path / "result-report.json").as_posix(), "w")) + with open((self.build_path / "result-report.json").as_posix(), "w") as f: + json.dump(self.result_report, f) # ------------------------------------------------------------------ # # Compilation methods # @@ -1850,67 +2278,80 @@ def compile_driver_worker(self, bgen_args): universal_newlines=True, ) - target_file = open(bgen_args["source_path"], "a") - - target_file.write("\n// Compile database: \n") - target_file.write("/*\n") - target_file.write( - "command: " + bgen_args["compiler_info"]['command'] + "\n") - target_file.write("location: " + - bgen_args["compiler_info"]['location'] + "\n") - target_file.write("file: " + bgen_args["compiler_info"]['file']) - target_file.write("\n*/\n") - - new_compiler_cmd = [] - compiler_cmd = bgen_args["compiler_cmd"] - target_file.write("\n// Compile command:") - target_file.write("\n/* \n") - output, errors = p.communicate() - if p.returncode: - print(" ".join(bgen_args["compiler_cmd"])) - print("\n-- [Futag] ERROR on target ", - bgen_args["target_name"], "\n") - for c in compiler_cmd: - if c.find(self.tmp_output_path.as_posix()) >= 0: - new_compiler_cmd.append( - c.replace(self.tmp_output_path.as_posix(), self.failed_path.as_posix())) - else: - new_compiler_cmd.append(c) - - else: - print("-- [Futag] Fuzz-driver ", - bgen_args["target_name"], " was compiled successfully!") - for c in compiler_cmd: - if c.find(self.tmp_output_path.as_posix()) >= 0: - new_compiler_cmd.append( - c.replace(self.tmp_output_path.as_posix(), self.succeeded_path.as_posix())) - else: - new_compiler_cmd.append(c) + with open(bgen_args["source_path"], "a") as target_file: + target_file.write("\n// Compile database: \n") + target_file.write("/*\n") + target_file.write( + "command: " + bgen_args["compiler_info"]['command'] + "\n") + target_file.write("location: " + + bgen_args["compiler_info"]['location'] + "\n") + target_file.write("file: " + bgen_args["compiler_info"]['file']) + target_file.write("\n*/\n") + + new_compiler_cmd = [] + compiler_cmd = bgen_args["compiler_cmd"] + target_file.write("\n// Compile command:") + target_file.write("\n/* \n") + output, errors = p.communicate() + if p.returncode: + logger.debug(" ".join(bgen_args["compiler_cmd"])) + logger.error("ERROR on target %s", bgen_args["target_name"]) + for c in compiler_cmd: + if c.find(self.tmp_output_path.as_posix()) >= 0: + new_compiler_cmd.append( + c.replace(self.tmp_output_path.as_posix(), self.failed_path.as_posix())) + else: + new_compiler_cmd.append(c) - target_file.write(" ".join(new_compiler_cmd)) - target_file.write("\n */\n") + else: + logger.info("Fuzz-driver %s was compiled successfully!", bgen_args["target_name"]) + for c in compiler_cmd: + if c.find(self.tmp_output_path.as_posix()) >= 0: + new_compiler_cmd.append( + c.replace(self.tmp_output_path.as_posix(), self.succeeded_path.as_posix())) + else: + new_compiler_cmd.append(c) - error_log_file = open(bgen_args["error_path"], "r") - if error_log_file: - target_file.write("\n// Error log:") - target_file.write("\n/* \n") - target_file.write("".join(error_log_file.readlines())) - error_log_file.close() + target_file.write(" ".join(new_compiler_cmd)) target_file.write("\n */\n") - target_file.close() - def compile_targets(self, workers: int = 4, keep_failed: bool = False, extra_params: str = "", extra_include: str = "", extra_dynamiclink: str = "", flags: str = "", coverage: bool = False, keep_original: bool = True): - """_summary_ + with open(bgen_args["error_path"], "r") as error_log_file: + target_file.write("\n// Error log:") + target_file.write("\n/* \n") + target_file.write("".join(error_log_file.readlines())) + target_file.write("\n */\n") + + def compile_targets(self, workers: int = 4, keep_failed: bool = False, extra_params: str = "", extra_include: str = "", extra_dynamiclink: str = "", flags: str = "", coverage: bool = False, keep_original: bool = True) -> None: + """Compile all generated fuzz targets using the configured toolchain. + + Discovers generated source files under ``self.tmp_output_path``, + constructs compiler commands with appropriate flags and include + paths, and compiles them in parallel. Successfully compiled + targets are moved to ``self.succeeded_path``; optionally, failed + targets are preserved in ``self.failed_path``. Args: - workers (int, optional): number of processes for compiling. Defaults to 4. - keep_failed (bool, optional): option for saving not compiled fuzz-targets. Defaults to False. - extra_params (str, optional): option for adding parameters while compiling. Defaults to "". - extra_include (str, optional): option for adding included directories while compiling. Defaults to "". - extra_dynamiclink (str, optional): option for adding dynamic libraries while compiling. Defaults to "". - flags (str, optional): flags for compiling fuzz-drivers. Defaults to "-fsanitize=address -g -O0". - coverage (bool, optional): option for adding coverage flag. Defaults to False. - keep_original (bool, optional): option for keeping .futag-fuzz-drivers. Defaults to False. + workers: Number of parallel processes for compilation. + Defaults to 4. + keep_failed: If True, copy non-compiling fuzz targets to + the ``failed/`` directory for inspection. Defaults to + False. + extra_params: Additional compiler parameters (space-separated + string) appended to the compile command. Defaults to "". + extra_include: Additional include directories + (space-separated string). Each entry is prefixed with + ``-I``. Defaults to "". + extra_dynamiclink: Dynamic libraries to link against + (space-separated string, e.g. "-lssl -lcrypto"). When + provided, overrides static library linking. Defaults + to "". + flags: Override default compiler flags entirely. When empty, + defaults to sanitizer + debug flags appropriate for the + target type. Defaults to "". + coverage: If True, add coverage instrumentation flags + (``--coverage``). Defaults to False. + keep_original: If True, retain the ``.futag-fuzz-drivers`` + temporary directory after compilation. Defaults to True. """ # include_subdir = self.target_library["header_dirs"] @@ -2058,7 +2499,7 @@ def compile_targets(self, workers: int = 4, keep_failed: bool = False, extra_par compiled_targets_list = [ x for x in self.tmp_output_path.glob("**/*.out") if x.is_file()] - print("-- [Futag] collecting result ...") + logger.info("collecting result ...") succeeded_tree = set() succeeded_dir = set() @@ -2074,8 +2515,8 @@ def compile_targets(self, workers: int = 4, keep_failed: bool = False, extra_par ((self.succeeded_path / dir.parents[1].name)).mkdir(parents=True, exist_ok=True) # shutil.move(dir.parents[0].as_posix(), (self.succeeded_path / dir.parents[1].name).as_posix(), copy_function=shutil.copytree) - copy_tree(dir.parents[0].as_posix( - ), (self.succeeded_path / dir.parents[1].name / dir.parents[0].name).as_posix()) + copytree(dir.parents[0].as_posix( + ), (self.succeeded_path / dir.parents[1].name / dir.parents[0].name).as_posix(), dirs_exist_ok=True) if keep_failed: failed_tree = set() @@ -2094,17 +2535,16 @@ def compile_targets(self, workers: int = 4, keep_failed: bool = False, extra_par ((self.failed_path / dir.parents[1].name)).mkdir(parents=True, exist_ok=True) # shutil.move(dir.parents[0].as_posix(), (self.failed_path / dir.parents[1].name).as_posix(), copy_function=shutil.copytree) - copy_tree(dir.parents[0].as_posix( - ), (self.failed_path / dir.parents[1].name / dir.parents[0].name).as_posix()) + copytree(dir.parents[0].as_posix( + ), (self.failed_path / dir.parents[1].name / dir.parents[0].name).as_posix(), dirs_exist_ok=True) else: delete_folder(self.failed_path) if not keep_original: delete_folder(self.tmp_output_path) - print( - "-- [Futag] Result of compiling: " - + str(len(compiled_targets_list)) - + " fuzz-driver(s)\n" + logger.info( + "Result of compiling: %s fuzz-driver(s)", + str(len(compiled_targets_list)) ) # ------------------------------------------------------------------ # diff --git a/src/python/futag-package/src/futag/blob_stamper_generator.py b/src/python/futag-package/src/futag/blob_stamper_generator.py index 4a536706..18837f6a 100644 --- a/src/python/futag-package/src/futag/blob_stamper_generator.py +++ b/src/python/futag-package/src/futag/blob_stamper_generator.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + # ************************************************** # ** ______ __ __ ______ ___ ______ ** # ** / ____/ / / / / /_ __/ / | / ____/ ** @@ -28,10 +32,11 @@ class BlobStamperGenerator(FuzzDataProviderGenerator): """ @property - def supports_c(self): + def supports_c(self) -> bool: + """Return whether this generator supports C targets.""" return True # BlobStamper supports both C and C++ - def _wrapper_file(self, func): - # Don't force .cpp extension like FDP does - use original extension + def _wrapper_file(self, func) -> dict: + """Return wrapper file metadata, using the original source file extension.""" self.target_extension = func["location"]["fullpath"].split(".")[-1] return BaseGenerator._wrapper_file(self, func) diff --git a/src/python/futag-package/src/futag/context_generator.py b/src/python/futag-package/src/futag/context_generator.py index 07ea95db..0fede332 100644 --- a/src/python/futag-package/src/futag/context_generator.py +++ b/src/python/futag-package/src/futag/context_generator.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + # ************************************************** # ** ______ __ __ ______ ___ ______ ** # ** / ____/ / / / / /_ __/ / | / ____/ ** @@ -15,6 +19,7 @@ """Futag ContextGenerator - context-aware fuzz target generation.""" import json +import logging import pathlib import copy import sys @@ -22,6 +27,8 @@ from futag.sysmsg import * from futag.generator import Generator +logger = logging.getLogger(__name__) + class ContextGenerator(Generator): """Context-aware Futag Generator that uses consumer call contexts. @@ -71,8 +78,7 @@ def __init__(self, futag_llvm_package: str, library_root: str, self.context_json_file = pathlib.Path(context_json_file) if self.context_json_file.exists(): - f = open(self.context_json_file.as_posix()) - if not f.closed: + with open(self.context_json_file.as_posix()) as f: self.consumer_contexts = json.load(f) else: sys.exit(INVALID_CONTEXT_FILE_PATH + " " + @@ -552,8 +558,8 @@ def sort_callexprs(self): total_context = [] for context in self.consumer_contexts: - print("====== Context: ") - print("cfg_blocks: ", context["cfg_blocks"]) + logger.info("====== Context: ") + logger.info("cfg_blocks: %s", context["cfg_blocks"]) cfg_blocks = context["cfg_blocks"] init_calls = context["init_calls"] modifying_calls = context["modifying_calls"] @@ -624,7 +630,7 @@ def _gen_context_wrapper(self, func): if (not len(self.state.buffer_size) and not self.state.dyn_cstring_size_idx and not self.state.dyn_cxxstring_size_idx and not self.state.dyn_wstring_size_idx and not self.state.file_idx) or not self.state.gen_this_function: log = self._log_file(func, self.gen_anonymous) if not log: - print(CANNOT_CREATE_LOG_FILE, func["qname"]) + logger.error(f"{CANNOT_CREATE_LOG_FILE} {func['qname']}") else: self.state.curr_func_log = f"Log for function: {func['qname']}\n{self.state.curr_func_log}" log.write(self.state.curr_func_log) @@ -632,8 +638,8 @@ def _gen_context_wrapper(self, func): return False # generate file name wrapper_result = self._wrapper_file(func) - print("Generating fuzzing-wapper for function ", func["qname"], ": ") - print("-- ", wrapper_result["msg"]) + logger.info("Generating fuzzing-wapper for function %s:", func["qname"]) + logger.info("-- %s", wrapper_result["msg"]) if not wrapper_result["file"]: self.state.gen_this_function = False return False diff --git a/src/python/futag-package/src/futag/exceptions.py b/src/python/futag-package/src/futag/exceptions.py new file mode 100644 index 00000000..a238c5b6 --- /dev/null +++ b/src/python/futag-package/src/futag/exceptions.py @@ -0,0 +1,33 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + +"""Custom exceptions for the Futag package. + +Provides a hierarchy of exceptions for structured error handling +instead of sys.exit() calls throughout the codebase. +""" + + +class FutagError(Exception): + """Base exception for all Futag errors.""" + + +class InvalidPathError(FutagError): + """Raised when a required file or directory path is invalid or missing.""" + + +class InvalidConfigError(FutagError): + """Raised when configuration parameters are invalid (e.g., bad target type).""" + + +class BuildError(FutagError): + """Raised when library build or analysis fails.""" + + +class GenerationError(FutagError): + """Raised when fuzz target generation fails.""" + + +class AnalysisError(FutagError): + """Raised when analysis result parsing or loading fails.""" diff --git a/src/python/futag-package/src/futag/fdp_generator.py b/src/python/futag-package/src/futag/fdp_generator.py index 2897cd83..0193edda 100644 --- a/src/python/futag-package/src/futag/fdp_generator.py +++ b/src/python/futag-package/src/futag/fdp_generator.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + # ************************************************** # ** ______ __ __ ______ ___ ______ ** # ** / ____/ / / / / /_ __/ / | / ____/ ** @@ -23,36 +27,40 @@ class FuzzDataProviderGenerator(BaseGenerator): def __init__(self, futag_llvm_package, library_root, target_type=LIBFUZZER, json_file=ANALYSIS_FILE_PATH, output_path=FUZZ_DRIVER_PATH, - build_path=BUILD_PATH, install_path=INSTALL_PATH, delimiter="."): + build_path=BUILD_PATH, install_path=INSTALL_PATH, delimiter=".") -> None: super().__init__(futag_llvm_package, library_root, target_type=target_type, json_file=json_file, output_path=output_path, build_path=build_path, install_path=install_path, delimiter=delimiter) - self.last_string_name = "" + self.last_string_name: str = "" @property - def default_headers(self): + def default_headers(self) -> list: + """Return default headers required by FDP-based fuzz targets.""" return ["stdio.h", "stddef.h", "time.h", "stdlib.h", "string.h", "stdint.h", "fuzzer/FuzzedDataProvider.h"] @property - def supports_c(self): + def supports_c(self) -> bool: + """Return whether this generator supports C targets.""" return False # Always C++ only @property - def needs_buffer_check(self): + def needs_buffer_check(self) -> bool: + """Return whether generated harnesses need a buffer size check.""" return False @property - def harness_preamble(self): + def harness_preamble(self) -> str: + """Return preamble code that initializes the FuzzedDataProvider.""" return " FuzzedDataProvider provider(Fuzz_Data, Fuzz_Size);\n" - def _wrapper_file(self, func): - # FDP always generates .cpp files + def _wrapper_file(self, func) -> dict: + """Return wrapper file metadata, forcing .cpp extension for FDP targets.""" self.target_extension = "cpp" return BaseGenerator._wrapper_file(self, func) - def _gen_builtin(self, param_name, gen_type_info): + def _gen_builtin(self, param_name, gen_type_info) -> dict: """Declare and assign value for a builtin type.""" type = gen_type_info["type_name"].replace( "(anonymous namespace)::", "") @@ -75,7 +83,8 @@ def _gen_builtin(self, param_name, gen_type_info): "buffer_size": [] } - def _gen_strsize(self, param_name, param_type, dyn_size_idx, array_name): + def _gen_strsize(self, param_name, param_type, dyn_size_idx, array_name) -> dict: + """Generate a string-size parameter using last consumed string length.""" self.last_string_name return { "gen_lines": [ @@ -87,7 +96,7 @@ def _gen_strsize(self, param_name, param_type, dyn_size_idx, array_name): "buffer_size": [] } - def _gen_cstring(self, param_name, gen_type_info, dyn_cstring_size_idx): + def _gen_cstring(self, param_name, gen_type_info, dyn_cstring_size_idx) -> dict: """Declare and assign value for a C string type.""" gen_lines = [ "//GEN_CSTRING\n", @@ -101,7 +110,7 @@ def _gen_cstring(self, param_name, gen_type_info, dyn_cstring_size_idx): "buffer_size": [] } - def _gen_wstring(self, param_name, gen_type_info, dyn_wstring_size_idx): + def _gen_wstring(self, param_name, gen_type_info, dyn_wstring_size_idx) -> dict: """Declare and assign value for a wide string type.""" ref_name = param_name if (gen_type_info["local_qualifier"]): @@ -125,7 +134,7 @@ def _gen_wstring(self, param_name, gen_type_info, dyn_wstring_size_idx): "buffer_size": [] } - def _gen_cxxstring(self, param_name, gen_type_info, dyn_cxxstring_size_idx): + def _gen_cxxstring(self, param_name, gen_type_info, dyn_cxxstring_size_idx) -> dict: """Declare and assign value for a C++ string type.""" ref_name = param_name if (gen_type_info["local_qualifier"]): @@ -146,7 +155,7 @@ def _gen_cxxstring(self, param_name, gen_type_info, dyn_cxxstring_size_idx): "buffer_size": [] } - def _gen_enum(self, enum_record, param_name, gen_type_info, compiler_info, anonymous=False): + def _gen_enum(self, enum_record, param_name, gen_type_info, compiler_info, anonymous=False) -> dict: """Declare and assign value for an enum type.""" if anonymous: enum_name = enum_record["name"] @@ -163,7 +172,7 @@ def _gen_enum(self, enum_record, param_name, gen_type_info, compiler_info, anony "buffer_size": ["sizeof(unsigned int)"] } - def _gen_array(self, param_name, gen_type_info): + def _gen_array(self, param_name, gen_type_info) -> dict: """Declare and assign value for an array type.""" return { "gen_lines": [ @@ -186,7 +195,7 @@ def _gen_array(self, param_name, gen_type_info): "buffer_size": [str(gen_type_info["length"]) + " * sizeof(" + gen_type_info["base_type_name"] + ")"] } - def _gen_void(self, param_name): + def _gen_void(self, param_name) -> dict: """Declare and assign value for a void type.""" return { "gen_lines": [ @@ -197,7 +206,7 @@ def _gen_void(self, param_name): "buffer_size": [] } - def _gen_qualifier(self, param_name, prev_param_name, gen_type_info): + def _gen_qualifier(self, param_name, prev_param_name, gen_type_info) -> dict: """Declare and assign value for a qualified type.""" return { "gen_lines": [ @@ -209,7 +218,7 @@ def _gen_qualifier(self, param_name, prev_param_name, gen_type_info): "buffer_size": [] } - def _gen_pointer(self, param_name, prev_param_name, gen_type_info): + def _gen_pointer(self, param_name, prev_param_name, gen_type_info) -> dict: """Declare and assign value for a pointer type.""" return { "gen_lines": [ diff --git a/src/python/futag-package/src/futag/fuzzer.py b/src/python/futag-package/src/futag/fuzzer.py index 730d76d2..16d23a6d 100644 --- a/src/python/futag-package/src/futag/fuzzer.py +++ b/src/python/futag-package/src/futag/fuzzer.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + # ************************************************** # ** ______ __ __ ______ ___ ______ ** # ** / ____/ / / / / /_ __/ / | / ____/ ** @@ -19,8 +23,12 @@ from shutil import which from pathlib import Path from subprocess import Popen, PIPE, call, run +import logging + from futag.sysmsg import * +logger = logging.getLogger(__name__) + # Regex patterns for crash log parsing RE_ERROR = r"^==\d*==ERROR: (\w*): (.*) on.*$" RE_LIBFUZZER_ERROR = r"^==\d*== ERROR: (\w*): (.*)$" @@ -50,7 +58,24 @@ class BaseFuzzer: """Base class containing all shared fuzzing logic.""" - def __init__(self, futag_llvm_package: str, fuzz_driver_path: str = FUZZ_DRIVER_PATH, debug: bool = False, gdb: bool = False, svres: bool = False, fork: int = 1, totaltime: int = 300, timeout: int = 10, memlimit: int = 2048, coverage: bool = False, leak: bool = False, introspect: bool = False, source_path: str = ""): + def __init__(self, futag_llvm_package: str, fuzz_driver_path: str = FUZZ_DRIVER_PATH, debug: bool = False, gdb: bool = False, svres: bool = False, fork: int = 1, totaltime: int = 300, timeout: int = 10, memlimit: int = 2048, coverage: bool = False, leak: bool = False, introspect: bool = False, source_path: str = "") -> None: + """Initialize the BaseFuzzer with fuzzing configuration. + + Args: + futag_llvm_package: Path to the Futag LLVM package (with binaries, scripts, etc). + fuzz_driver_path: Location of fuzz-drivers, default "futag-fuzz-drivers". + debug: Print debug information while fuzzing, default False. + gdb: Debug crashes with GDB, default False. + svres: Generate svres file for Svace, default False. + fork: Fork mode of libFuzzer, default 1 (no fork mode). + totaltime: Total time of fuzzing one fuzz-driver in seconds, default 300. + timeout: If a fuzz-driver takes longer than this timeout, the process is treated as a failure case, default 10. + memlimit: Memory usage limit in Mb (rss_limit_mb), 0 to disable, default 2048. + coverage: Show coverage of fuzzing, default False. + leak: Detect memory leaks, default False. + introspect: Integrate with fuzz-introspector, default False. + source_path: Path to source code for coverage reports, default "". + """ self.futag_llvm_package = futag_llvm_package self.fuzz_driver_path = fuzz_driver_path self.source_path = source_path @@ -84,17 +109,38 @@ def __init__(self, futag_llvm_package: str, fuzz_driver_path: str = FUZZ_DRIVER_ # will be passed self.backtrace_hashes = set() - def _error_id(self, error_string): + def _error_id(self, error_string: str) -> str: + """Compute a simple numeric ID from an error string by summing character ordinals. + + Args: + error_string: The error description string to convert. + + Returns: + A string representation of the computed numeric ID. + """ error_id = 0 for c in error_string: error_id += ord(c) return str(error_id) - def _printer(self, data): + def _printer(self, data: str) -> None: + """Print data to stdout with carriage return and line clear escape sequence. + + Args: + data: The data to print. + """ sys.stdout.write("\r\x1b[K" + data.__str__()) sys.stdout.flush() - def _xml_escape(self, s): + def _xml_escape(self, s: str) -> str: + """Escape special XML characters and newlines in a string. + + Args: + s: The string to escape. + + Returns: + The XML-escaped string. + """ s = s.replace("&", "&") s = s.replace("<", "<") s = s.replace(">", ">") @@ -102,7 +148,7 @@ def _xml_escape(self, s): s = s.replace("\n", " ") return s - def _get_backtrace_hash(self, backtrace): + def _get_backtrace_hash(self, backtrace: dict) -> int: ''' # Format of backtrace: # backtrace= { @@ -136,16 +182,21 @@ def _get_backtrace_hash(self, backtrace): ) return hash(str(backtrace["warnID"]) + input_str) - def _parse_crash_log(self, crashlog_path): - """Reads crash log, applies regex, returns structured data. + def _parse_crash_log(self, crashlog_path: str) -> tuple: + """Parse a libFuzzer crash log file and extract stack trace information. + + Args: + crashlog_path: Path to the crash log file to parse. Returns: - tuple: (backtrace dict or {}, artifact_file str) + A tuple of (backtrace, artifact_file) where backtrace is a dict + containing structured crash information or an empty dict if no + crash was found, and artifact_file is the path to the crash artifact. """ with open(crashlog_path, "r", errors="ignore") as f: lines = f.readlines() if self.gdb: - print("-- [Futag] crash log:\n", "".join(lines)) + logger.debug("crash log:\n%s", "".join(lines)) backtrace = {} parsing_error = False @@ -231,12 +282,22 @@ def _parse_crash_log(self, crashlog_path): return backtrace, artifact_file - def _run_gdb_debug(self, fuzz_driver, artifact_file, backtrace, tmpdir): - """Creates .gdbinit in tmpdir, runs GDB 3 passes, returns updated backtrace. + def _run_gdb_debug(self, fuzz_driver: str, artifact_file: str, backtrace: dict, tmpdir: str) -> dict: + """Run GDB to collect detailed crash information including types and values. + + Creates a .gdbinit in tmpdir and runs GDB in three passes: + Pass 1: Set breakpoints, output all args/variables. + Pass 2: Get types of args/variables. + Pass 3: Get values. + + Args: + fuzz_driver: Path to the fuzz-driver executable. + artifact_file: Path to the crash artifact file. + backtrace: The backtrace dict from _parse_crash_log to augment. + tmpdir: Temporary directory for GDB init and log files. - Pass 1: Set breakpoints, output all args/variables - Pass 2: Get types of args/variables - Pass 3: Get values + Returns: + The updated backtrace dict with variable info populated. """ gdbinit_path = os.path.join(tmpdir, ".gdbinit") @@ -296,7 +357,7 @@ def _run_gdb_debug(self, fuzz_driver, artifact_file, backtrace, tmpdir): cwd=tmpdir, ) except Exception as e: - print("-- [Futag] Debug with GDB: set breakpoints failed!", e) + logger.error("Debug with GDB: set breakpoints failed! %s", e) # --- Pass 2: getting type of args, variables --- count_role_traces = 0 @@ -363,7 +424,7 @@ def _run_gdb_debug(self, fuzz_driver, artifact_file, backtrace, tmpdir): cwd=tmpdir, ) except Exception as e: - print("-- [Futag] Debug with GDB: get types of variables failed!", e) + logger.error("Debug with GDB: get types of variables failed! %s", e) # --- Pass 3: getting values --- count_role_traces = 0 @@ -464,7 +525,7 @@ def _run_gdb_debug(self, fuzz_driver, artifact_file, backtrace, tmpdir): cwd=tmpdir, ) except Exception as e: - print("-- [Futag] Debug with GDB: get values failed!", e) + logger.error("Debug with GDB: get values failed! %s", e) # Read back values into backtrace count_role_traces = 0 @@ -485,8 +546,16 @@ def _run_gdb_debug(self, fuzz_driver, artifact_file, backtrace, tmpdir): return backtrace - def _write_svres(self, backtrace): - """Writes XML svres output for a backtrace, deduplicating by hash.""" + def _write_svres(self, backtrace: dict) -> None: + """Write a single crash backtrace entry to the svres XML file. + + Writes XML svres output for a backtrace, deduplicating by hash. + Appends warning info to warning_info.svres and explanation to + warning_info_ex.svres. + + Args: + backtrace: The structured backtrace dict to write. + """ hash_backtrace = self._get_backtrace_hash(backtrace) if hash_backtrace in self.backtrace_hashes: return @@ -537,13 +606,13 @@ def _write_svres(self, backtrace): + '.comment.statusDefault' ) - def _parse_libfuzzer_log(self, fuzz_driver, libFuzzer_log, gdb=False): + def _parse_libfuzzer_log(self, fuzz_driver: str, libFuzzer_log: str, gdb: bool = False) -> None: """Orchestrator: parse crash log, optionally debug with GDB, write svres. Args: - fuzz_driver (str): path to the fuzz-driver - libFuzzer_log (str): path of libFuzzer log - gdb (bool, optional): option for parsing with GDB. Defaults to False. + fuzz_driver: Path to the fuzz-driver executable. + libFuzzer_log: Path of libFuzzer log file. + gdb: Option for parsing with GDB. Defaults to False. """ backtrace, artifact_file = self._parse_crash_log(libFuzzer_log) @@ -557,11 +626,17 @@ def _parse_libfuzzer_log(self, fuzz_driver, libFuzzer_log, gdb=False): self._write_svres(backtrace) - def _get_corpus_args(self, target_path): + def _get_corpus_args(self, target_path) -> list: """Override in subclass to provide corpus path args.""" return [] - def _build_single_coverage(self, object_file, path): + def _build_single_coverage(self, object_file: str, path: str) -> None: + """Build coverage report for a single fuzz-driver using llvm-profdata and llvm-cov. + + Args: + object_file: Path to the instrumented object file. + path: Directory path for the coverage HTML output. + """ my_env = os.environ.copy() my_env["LLVM_PROFILE_FILE"] = object_file + ".profraw" llvm_profdata = self.futag_llvm_package / "bin/llvm-profdata" @@ -622,11 +697,16 @@ def _build_single_coverage(self, object_file, path): os.rename(path + "/index.html", object_file + ".html") if self.debug: - print(" ".join(llvm_profdata_command)) - print(" ".join(llvm_cov_report)) - print(" ".join(llvm_cov_show)) + logger.debug(" ".join(llvm_profdata_command)) + logger.debug(" ".join(llvm_cov_report)) + logger.debug(" ".join(llvm_cov_show)) + + def _build_overall_coverage(self, path) -> None: + """Build an overall coverage report by merging all profraw files. - def _build_overall_coverage(self, path): + Args: + path: Path object to the fuzz-driver directory containing profraw files. + """ my_env = os.environ.copy() profdata_files = [x.as_posix() for x in path.glob("**/*.profraw") if x.is_file()] object_list = [x.as_posix()[:-8] for x in path.glob("**/*.profraw") if x.is_file()] @@ -683,12 +763,17 @@ def _build_overall_coverage(self, path): ) if self.debug: - print(" ".join(llvm_cov_show)) - print(" ".join(llvm_cov_report)) - print(" ".join(llvm_profdata_command)) + logger.debug(" ".join(llvm_cov_show)) + logger.debug(" ".join(llvm_cov_report)) + logger.debug(" ".join(llvm_profdata_command)) - def _finalize_svres(self): - """Generate svres file from collected warning info.""" + def _finalize_svres(self) -> None: + """Write the closing XML tags to the svres file. + + Reads warning_info.svres and warning_info_ex.svres, merges them + into the svres template, writes the final futag.svres file, and + removes the intermediate files. + """ template_file = self.futag_llvm_package / "svres-tmpl/svres.tmpl" warning_info_text = "" warning_info_path = Path.cwd().absolute() / "warning_info.svres" @@ -709,14 +794,14 @@ def _finalize_svres(self): warning_info_ex_path.unlink() with open((self.fuzz_driver_path / "futag.svres").as_posix(), "w") as svres: svres.write(lines) - print("-- [Futag] Please import file ", (self.fuzz_driver_path / - "futag.svres").as_posix(), " to Svace project to view result!") + logger.info("Please import file %s to Svace project to view result!", (self.fuzz_driver_path / + "futag.svres").as_posix()) - def fuzz(self, extra_param: str = ""): + def fuzz(self, extra_param: str = "") -> None: """Helper for automatic fuzzing. Args: - extra_param (str, optional): Extra params for fuzzing. Defaults to "". + extra_param: Extra params for fuzzing. Defaults to "". """ symbolizer = self.futag_llvm_package / "bin/llvm-symbolizer" generated_functions = [ @@ -726,7 +811,7 @@ def fuzz(self, extra_param: str = ""): fuzz_driver_dirs = [x for x in func_dir.iterdir() if x.is_dir()] for dir in fuzz_driver_dirs: for x in [t for t in dir.glob("*.out") if t.is_file()]: - print("\n-- [Futag] FUZZING driver: " + x.stem + "... \n") + logger.info("FUZZING driver: %s...", x.stem) my_env = os.environ.copy() if not self.leak: my_env["ASAN_OPTIONS"] = "detect_leaks=0" @@ -760,8 +845,7 @@ def fuzz(self, extra_param: str = ""): if extra_param: execute_command = execute_command + extra_param.split(" ") if self.debug: - print("-- [Futag] FUZZING command:" + - " ".join(execute_command)) + logger.debug("FUZZING command: %s", " ".join(execute_command)) call( execute_command, stdout=PIPE, @@ -786,13 +870,11 @@ def fuzz(self, extra_param: str = ""): ) p.communicate() if self.gdb: - print( - "-- [Futag]: Parsing crashes with GDB: ", x.as_posix()) + logger.info("Parsing crashes with GDB: %s", x.as_posix()) self._parse_libfuzzer_log( x.as_posix(), crashlog_filename, True) else: - print( - "-- [Futag]: Parsing crash without GDB: ", x.as_posix()) + logger.info("Parsing crash without GDB: %s", x.as_posix()) self._parse_libfuzzer_log( x.as_posix(), crashlog_filename, False) # build single coverage @@ -805,28 +887,29 @@ def fuzz(self, extra_param: str = ""): # generate svres file self._finalize_svres() - print("============ FINISH ============") + logger.info("============ FINISH ============") class Fuzzer(BaseFuzzer): """Futag Fuzzer""" - def __init__(self, futag_llvm_package: str, fuzz_driver_path: str = FUZZ_DRIVER_PATH, debug: bool = False, gdb: bool = False, svres: bool = False, fork: int = 1, totaltime: int = 300, timeout: int = 10, memlimit: int = 2048, coverage: bool = False, leak: bool = False, introspect: bool = False, source_path: str = ""): - """_summary_ + def __init__(self, futag_llvm_package: str, fuzz_driver_path: str = FUZZ_DRIVER_PATH, debug: bool = False, gdb: bool = False, svres: bool = False, fork: int = 1, totaltime: int = 300, timeout: int = 10, memlimit: int = 2048, coverage: bool = False, leak: bool = False, introspect: bool = False, source_path: str = "") -> None: + """Initialize the Fuzzer. Args: - futag_llvm_package (str): path to the futag llvm package (with binaries, scripts, etc) - fuzz_driver_path (str, optional): location of fuzz-drivers, default "futag-fuzz-drivers". Defaults to FUZZ_DRIVER_PATH. - debug (bool, optional): print debug infomation while fuzzing, default False. Defaults to False. - gdb (bool, optional): debug crashes with GDB, default False. Defaults to False. - svres (bool, optional): generate svres file for Svace (if you have Svace), default False. Defaults to False. - fork (int, optional): fork mode of libFuzzer (https://llvm.org/docs/LibFuzzer.html#fork-mode). Defaults to 1 - no fork mode. - totaltime (int, optional): total time of fuzzing one fuzz-driver, default 300 seconds. Defaults to 300. - timeout (int, optional): if an fuzz-drive takes longer than this timeout, the process is treated as a failure case. Defaults to 10. - memlimit (int, optional): option for rss_limit_mb of libFuzzer - Memory usage limit in Mb, 0 - disable the limit. Defaults to 2048. - coverage (bool, optional): option for showing coverage of fuzzing. Defaults to False. - leak (bool, optional): detecting memory leak, default False. Defaults to False. - introspect (bool, optional): option for integrate with fuzz-introspector (to be add soon). Defaults to False. + futag_llvm_package: Path to the Futag LLVM package (with binaries, scripts, etc). + fuzz_driver_path: Location of fuzz-drivers, default "futag-fuzz-drivers". + debug: Print debug information while fuzzing, default False. + gdb: Debug crashes with GDB, default False. + svres: Generate svres file for Svace, default False. + fork: Fork mode of libFuzzer, default 1 (no fork mode). + totaltime: Total time of fuzzing one fuzz-driver in seconds, default 300. + timeout: If a fuzz-driver takes longer than this timeout, the process is treated as a failure case, default 10. + memlimit: Memory usage limit in Mb (rss_limit_mb), 0 to disable, default 2048. + coverage: Show coverage of fuzzing, default False. + leak: Detect memory leaks, default False. + introspect: Integrate with fuzz-introspector, default False. + source_path: Path to source code for coverage reports, default "". """ super().__init__( futag_llvm_package=futag_llvm_package, @@ -844,7 +927,7 @@ def __init__(self, futag_llvm_package: str, fuzz_driver_path: str = FUZZ_DRIVER_ source_path=source_path, ) - def _get_corpus_args(self, target_path): + def _get_corpus_args(self, target_path) -> list: """Fuzzer does not add corpus path args.""" return [] @@ -852,22 +935,22 @@ def _get_corpus_args(self, target_path): class NatchFuzzer(BaseFuzzer): """Futag Fuzzer for Natch""" - def __init__(self, futag_llvm_package: str, fuzz_driver_path: str = FUZZ_DRIVER_PATH, debug: bool = False, gdb: bool = False, svres: bool = False, fork: int = 1, totaltime: int = 300, timeout: int = 10, memlimit: int = 2048, coverage: bool = False, leak: bool = False, introspect: bool = False): - """_summary_ + def __init__(self, futag_llvm_package: str, fuzz_driver_path: str = FUZZ_DRIVER_PATH, debug: bool = False, gdb: bool = False, svres: bool = False, fork: int = 1, totaltime: int = 300, timeout: int = 10, memlimit: int = 2048, coverage: bool = False, leak: bool = False, introspect: bool = False) -> None: + """Initialize the NatchFuzzer. Args: - futag_llvm_package (str): path to the futag llvm package (with binaries, scripts, etc) - fuzz_driver_path (str, optional): location of fuzz-drivers, default "futag-fuzz-drivers". Defaults to FUZZ_DRIVER_PATH. - debug (bool, optional): print debug infomation while fuzzing, default False. Defaults to False. - gdb (bool, optional): debug crashes with GDB, default False. Defaults to False. - svres (bool, optional): generate svres file for Svace (if you have Svace), default False. Defaults to False. - fork (int, optional): fork mode of libFuzzer (https://llvm.org/docs/LibFuzzer.html#fork-mode). Defaults to 1 - no fork mode. - totaltime (int, optional): total time of fuzzing one fuzz-driver, default 300 seconds. Defaults to 300. - timeout (int, optional): if an fuzz-drive takes longer than this timeout, the process is treated as a failure case. Defaults to 10. - memlimit (int, optional): option for rss_limit_mb of libFuzzer - Memory usage limit in Mb, 0 - disable the limit. Defaults to 2048. - coverage (bool, optional): option for showing coverage of fuzzing. Defaults to False. - leak (bool, optional): detecting memory leak, default False. Defaults to False. - introspect (bool, optional): option for integrate with fuzz-introspector (to be add soon). Defaults to False. + futag_llvm_package: Path to the Futag LLVM package (with binaries, scripts, etc). + fuzz_driver_path: Location of fuzz-drivers, default "futag-fuzz-drivers". + debug: Print debug information while fuzzing, default False. + gdb: Debug crashes with GDB, default False. + svres: Generate svres file for Svace, default False. + fork: Fork mode of libFuzzer, default 1 (no fork mode). + totaltime: Total time of fuzzing one fuzz-driver in seconds, default 300. + timeout: If a fuzz-driver takes longer than this timeout, the process is treated as a failure case, default 10. + memlimit: Memory usage limit in Mb (rss_limit_mb), 0 to disable, default 2048. + coverage: Show coverage of fuzzing, default False. + leak: Detect memory leaks, default False. + introspect: Integrate with fuzz-introspector, default False. """ super().__init__( futag_llvm_package=futag_llvm_package, @@ -885,7 +968,7 @@ def __init__(self, futag_llvm_package: str, fuzz_driver_path: str = FUZZ_DRIVER_ source_path="", ) - def _get_corpus_args(self, target_path): + def _get_corpus_args(self, target_path) -> list: """NatchFuzzer adds corpus path to the execute command.""" corpus_path = (target_path.parents[3] / "Natch_corpus" / target_path.parents[1].stem.replace("anonymous_", "")) return [corpus_path.as_posix()] diff --git a/src/python/futag-package/src/futag/generator.py b/src/python/futag-package/src/futag/generator.py index 1a4e4f20..eac45bf2 100644 --- a/src/python/futag-package/src/futag/generator.py +++ b/src/python/futag-package/src/futag/generator.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + # ************************************************** # ** ______ __ __ ______ ___ ______ ** # ** / ____/ / / / / /_ __/ / | / ____/ ** @@ -22,7 +26,6 @@ from subprocess import Popen, PIPE from multiprocessing import Pool from typing import List -from distutils.dir_util import copy_tree from futag.sysmsg import * from futag.preprocessor import * @@ -36,7 +39,7 @@ class Generator(BaseGenerator): def __init__(self, futag_llvm_package, library_root, alter_compiler="", target_type=LIBFUZZER, json_file=ANALYSIS_FILE_PATH, output_path=FUZZ_DRIVER_PATH, build_path=BUILD_PATH, - install_path=INSTALL_PATH, delimiter=".", exclude_headers=None): + install_path=INSTALL_PATH, delimiter=".", exclude_headers=None) -> None: super().__init__(futag_llvm_package, library_root, target_type=target_type, json_file=json_file, output_path=output_path, build_path=build_path, @@ -45,22 +48,26 @@ def __init__(self, futag_llvm_package, library_root, alter_compiler="", self.exclude_headers = exclude_headers if exclude_headers else [] @property - def default_headers(self): + def default_headers(self) -> list: + """Return default C headers required by generated fuzz targets.""" return ["stdio.h", "stddef.h", "time.h", "stdlib.h", "string.h", "stdint.h"] @property - def supports_c(self): + def supports_c(self) -> bool: + """Return whether this generator supports C targets.""" return True @property - def needs_buffer_check(self): + def needs_buffer_check(self) -> bool: + """Return whether generated harnesses need a buffer size check.""" return True @property - def harness_preamble(self): + def harness_preamble(self) -> str: + """Return preamble code inserted at the start of the harness body.""" return "" - def _gen_builtin(self, param_name, gen_type_info): + def _gen_builtin(self, param_name, gen_type_info) -> dict: """Declare and assign value for a builtin type.""" return { "gen_lines": [ @@ -73,7 +80,7 @@ def _gen_builtin(self, param_name, gen_type_info): "buffer_size": ["sizeof(" + gen_type_info["type_name"].replace("(anonymous namespace)::", "") + ")"] } - def _gen_strsize(self, param_name, param_type, dyn_size_idx, array_name): + def _gen_strsize(self, param_name, param_type, dyn_size_idx, array_name) -> dict: """Generate a string-size parameter.""" return { "gen_lines": [ @@ -84,7 +91,7 @@ def _gen_strsize(self, param_name, param_type, dyn_size_idx, array_name): "buffer_size": [] } - def _gen_cstring(self, param_name, gen_type_info, dyn_cstring_size_idx): + def _gen_cstring(self, param_name, gen_type_info, dyn_cstring_size_idx) -> dict: """Declare and assign value for a C string type.""" ref_name = param_name if gen_type_info["local_qualifier"]: @@ -109,7 +116,7 @@ def _gen_cstring(self, param_name, gen_type_info, dyn_cstring_size_idx): "buffer_size": [] } - def _gen_wstring(self, param_name, gen_type_info, dyn_wstring_size_idx): + def _gen_wstring(self, param_name, gen_type_info, dyn_wstring_size_idx) -> dict: """Declare and assign value for a wide string type.""" ref_name = param_name if gen_type_info["local_qualifier"]: @@ -134,7 +141,7 @@ def _gen_wstring(self, param_name, gen_type_info, dyn_wstring_size_idx): "buffer_size": [] } - def _gen_cxxstring(self, param_name, gen_type_info, dyn_cxxstring_size_idx): + def _gen_cxxstring(self, param_name, gen_type_info, dyn_cxxstring_size_idx) -> dict: """Declare and assign value for a C++ string type.""" ref_name = param_name if gen_type_info["local_qualifier"]: @@ -148,7 +155,7 @@ def _gen_cxxstring(self, param_name, gen_type_info, dyn_cxxstring_size_idx): "buffer_size": [] } - def _gen_enum(self, enum_record, param_name, gen_type_info, compiler_info, anonymous=False): + def _gen_enum(self, enum_record, param_name, gen_type_info, compiler_info, anonymous=False) -> dict: """Declare and assign value for an enum type.""" enum_name = gen_type_info["type_name"] enum_length = len(enum_record["enum_values"]) @@ -175,7 +182,7 @@ def _gen_enum(self, enum_record, param_name, gen_type_info, compiler_info, anony "buffer_size": ["sizeof(unsigned int)"] } - def _gen_array(self, param_name, gen_type_info): + def _gen_array(self, param_name, gen_type_info) -> dict: """Declare and assign value for an array type.""" return { "gen_lines": [ @@ -193,7 +200,7 @@ def _gen_array(self, param_name, gen_type_info): "buffer_size": [str(gen_type_info["length"]) + " * sizeof(" + gen_type_info["base_type_name"] + ")"] } - def _gen_void(self, param_name): + def _gen_void(self, param_name) -> dict: """Declare and assign value for a void type.""" return { "gen_lines": [ @@ -204,7 +211,7 @@ def _gen_void(self, param_name): "buffer_size": [] } - def _gen_qualifier(self, param_name, prev_param_name, gen_type_info): + def _gen_qualifier(self, param_name, prev_param_name, gen_type_info) -> dict: """Declare and assign value for a qualified type.""" return { "gen_lines": [ @@ -215,7 +222,7 @@ def _gen_qualifier(self, param_name, prev_param_name, gen_type_info): "buffer_size": [] } - def _gen_pointer(self, param_name, prev_param_name, gen_type_info): + def _gen_pointer(self, param_name, prev_param_name, gen_type_info) -> dict: """Declare and assign value for a pointer type.""" return { "gen_lines": [ diff --git a/src/python/futag-package/src/futag/generator_state.py b/src/python/futag-package/src/futag/generator_state.py index ad14f724..9126dba8 100644 --- a/src/python/futag-package/src/futag/generator_state.py +++ b/src/python/futag-package/src/futag/generator_state.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + """Generator state management for Futag fuzz target generation. This module provides a dataclass that encapsulates all mutable state used diff --git a/src/python/futag-package/src/futag/natch_generator.py b/src/python/futag-package/src/futag/natch_generator.py index 01c770ae..3d8600e5 100644 --- a/src/python/futag-package/src/futag/natch_generator.py +++ b/src/python/futag-package/src/futag/natch_generator.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + # ************************************************** # ** ______ __ __ ______ ___ ______ ** # ** / ____/ / / / / /_ __/ / | / ____/ ** @@ -15,6 +19,7 @@ """Futag NatchGenerator - Fuzz target generation using Natch runtime data.""" import json +import logging import pathlib import os import sys @@ -22,6 +27,8 @@ from futag.generator import Generator from futag.sysmsg import * +logger = logging.getLogger(__name__) + class NatchGenerator(Generator): """Futag Generator for Natch. @@ -70,8 +77,9 @@ def __init__(self, futag_llvm_package: str, library_root: str, def parse_values(self): """Parse Natch JSON and generate seed corpus files.""" - print(self.Natch_corpus_path.as_posix()) - natch_values = json.load(open(self.natch_json_file.as_posix())) + logger.info(self.Natch_corpus_path.as_posix()) + with open(self.natch_json_file.as_posix()) as f: + natch_values = json.load(f) if not natch_values: raise ValueError(COULD_NOT_PARSE_NATCH_CALLSTACK) function_name_list = set() @@ -90,10 +98,10 @@ def parse_values(self): index += 1 blob_name = "blob" + str(index) arguments = [] - print("-- Parsing data of function " + function["Function name"]) + logger.info("Parsing data of function %s", function["Function name"]) with open((self.Natch_corpus_path / function["Function name"] / blob_name).as_posix(), "wb") as f: - print(" [*] writing seed file: " + (self.Natch_corpus_path / - function["Function name"] / blob_name).as_posix() + "...") + logger.info(" [*] writing seed file: %s...", (self.Natch_corpus_path / + function["Function name"] / blob_name).as_posix()) for arg in function["Arguments"]: arguments.append(arg["Type"]) if (arg["Type"] in ["char *", "const char *", "unsigned char *", "const unsigned char *", "const char *&"]): @@ -389,7 +397,7 @@ def _gen_target_function(self, func, param_id) -> bool: if (not len(self.state.buffer_size) and not self.state.dyn_cstring_size_idx and not self.state.dyn_cxxstring_size_idx and not self.state.dyn_wstring_size_idx and not self.state.file_idx) or not self.state.gen_this_function: log = self._log_file(func, self.gen_anonymous) if not log: - print(CANNOT_CREATE_LOG_FILE, func["qname"]) + logger.error(f"{CANNOT_CREATE_LOG_FILE} {func['qname']}") else: self.state.curr_func_log = f"Log for function: {func['qname']}\n{self.state.curr_func_log}" log.write(self.state.curr_func_log) @@ -397,9 +405,9 @@ def _gen_target_function(self, func, param_id) -> bool: return False # generate file name wrapper_result = self._wrapper_file(func) - print("Generating fuzzing-wapper for function ", - func["qname"], ": ") - print("-- ", wrapper_result["msg"]) + logger.info("Generating fuzzing-wapper for function %s:", + func["qname"]) + logger.info("-- %s", wrapper_result["msg"]) if not wrapper_result["file"]: self.state.gen_this_function = False return False @@ -767,7 +775,7 @@ def _gen_anonymous_function(self, func, param_id) -> bool: if (not len(self.state.buffer_size) and not self.state.dyn_cstring_size_idx and not self.state.dyn_cxxstring_size_idx and not self.state.dyn_wstring_size_idx and not self.state.file_idx) or not self.state.gen_this_function: log = self._log_file(func, self.gen_anonymous) if not log: - print(CANNOT_CREATE_LOG_FILE, func["qname"]) + logger.error(f"{CANNOT_CREATE_LOG_FILE} {func['qname']}") else: self.state.curr_func_log = f"Log for function: {func['qname']}\n{self.state.curr_func_log}" log.write(self.state.curr_func_log) @@ -777,9 +785,9 @@ def _gen_anonymous_function(self, func, param_id) -> bool: f = self._anonymous_wrapper_file(func) if not f: self.state.gen_this_function = False - print(CANNOT_CREATE_WRAPPER_FILE, func["qname"]) + logger.error(f"{CANNOT_CREATE_WRAPPER_FILE} {func['qname']}") return False - print(WRAPPER_FILE_CREATED, f.name) + logger.info(f"{WRAPPER_FILE_CREATED} {f.name}") for line in self._gen_header(func["location"]["fullpath"]): f.write("// " + line) @@ -896,7 +904,7 @@ def _gen_anonymous_function(self, func, param_id) -> bool: self._append_gen_dict(curr_gen) if gen_type_info["gen_type"] == GEN_REFSTRING: - print("!!!GEN_REFSTRING\n\n\n") + logger.debug("!!!GEN_REFSTRING") # GEN FILE NAME OR # GEN STRING if (curr_param["param_usage"] in ["FILE_PATH_READ", "FILE_PATH_WRITE", "FILE_PATH_RW", "FILE_PATH"] or curr_param["param_name"] in ["filename", "file", "filepath"] or curr_param["param_name"].find('file') != -1 or curr_param["param_name"].find('File') != -1) and len(curr_param["gen_list"]) == 1: curr_name = "f_" + curr_name # string_prefix @@ -1152,8 +1160,7 @@ def gen_targets(self, anonymous=False, max_wrappers=10): continue # For C if func["access_type"] == AS_NONE and func["fuzz_it"] and func["storage_class"] < 2 and (func["parent_hash"] == ""): - print( - "-- [Futag] Try to generate fuzz-driver for function: ", func["name"], "...") + logger.info("Try to generate fuzz-driver for function: %s...", func["name"]) C_generated_function.append(func["name"]) self.state.gen_this_function = True self.state.header = [] @@ -1176,8 +1183,7 @@ def gen_targets(self, anonymous=False, max_wrappers=10): # For C++, Declare object of class and then call the method if func["access_type"] == AS_PUBLIC and func["fuzz_it"] and func["func_type"] in [FUNC_CXXMETHOD, FUNC_CONSTRUCTOR, FUNC_DEFAULT_CONSTRUCTOR, FUNC_GLOBAL, FUNC_STATIC] and (not "::operator" in func["qname"]): Cplusplus_usual_class_method.append(func["qname"]) - print( - "-- [Futag] Try to generate fuzz-driver for class method: ", func["name"], "...") + logger.info("Try to generate fuzz-driver for class method: %s...", func["name"]) self.state.gen_this_function = True self.state.header = [] self.state.buffer_size = [] @@ -1227,8 +1233,8 @@ def gen_targets(self, anonymous=False, max_wrappers=10): "Cplusplus_anonymous_class_methods": Cplusplus_anonymous_class_method, "C_unknown_functions": C_unknown_function } - json.dump(self.result_report, open( - (self.build_path / "result-report.json").as_posix(), "w")) + with open((self.build_path / "result-report.json").as_posix(), "w") as f: + json.dump(self.result_report, f) def gen_targets_from_callstack(self, target): """Generate fuzz targets from a specific Natch callstack entry. diff --git a/src/python/futag-package/src/futag/preprocessor.py b/src/python/futag-package/src/futag/preprocessor.py index 4ed7a74c..08992775 100644 --- a/src/python/futag-package/src/futag/preprocessor.py +++ b/src/python/futag-package/src/futag/preprocessor.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + # ************************************************** # ** ______ __ __ ______ ___ ______ ** # ** / ____/ / / / / /_ __/ / | / ____/ ** @@ -21,9 +25,13 @@ import shlex import sys +import logging + from futag.sysmsg import * from subprocess import Popen, PIPE +logger = logging.getLogger(__name__) + def delete_folder(pth): """ @@ -61,20 +69,20 @@ def _run_command(cmd, env=None, msg_prefix="", fail_msg="", succeed_msg="", kwargs.update(stdout=PIPE, stderr=PIPE) p = Popen(cmd, **kwargs) if msg_prefix: - print(msg_prefix, " ".join(p.args)) + logger.debug("%s %s", msg_prefix, " ".join(p.args)) output, errors = p.communicate() if p.returncode: if errors: - print(errors) + logger.error(errors) if exit_on_fail and fail_msg: sys.exit(fail_msg) elif fail_msg: - print(fail_msg) + logger.error(fail_msg) else: if output and capture: - print(output) + logger.debug(output) if succeed_msg: - print(succeed_msg) + logger.info(succeed_msg) return p.returncode, output, errors @@ -99,12 +107,12 @@ def _load_json_files(file_list, description=""): with open(jf, "r") as f: data = json.load(f) except JSONDecodeError: - print(f" -- [Futag]: Warning: Could not parse JSON in {jf}") + logger.warning(f"Could not parse JSON in {jf}") continue if data is None: - print(f" -- [Futag]: Warning: loading json from file {jf} failed!") + logger.warning(f"loading json from file {jf} failed!") continue - print(f" -- [Futag]: Analyzing {description} in file {jf} ...") + logger.info(f"Analyzing {description} in file {jf} ...") yield data @@ -230,30 +238,30 @@ def auto_build(self) -> bool: bool: result of auto build. """ - print(AUTO_BUILD_MSG) + logger.info(AUTO_BUILD_MSG) if (self.library_root / "configure").exists(): - print(CONFIGURE_FOUND) + logger.info(CONFIGURE_FOUND) self.build_configure() return True # TODO: добавить возможность указать папку cmake!!! if (self.library_root / "CMakeLists.txt").exists(): - print(CMAKE_FOUND) + logger.info(CMAKE_FOUND) self.build_cmake() return True if (self.library_root / "Makefile").exists(): - print(MAKEFILE_FOUND) + logger.info(MAKEFILE_FOUND) self.build_makefile() return True if (self.library_root / "meson.build").exists(): - print(CMAKE_FOUND) + logger.info(CMAKE_FOUND) self.build_meson() return True - print(AUTO_BUILD_FAILED) + logger.error(AUTO_BUILD_FAILED) return False def build_meson(self) -> bool: @@ -267,7 +275,7 @@ def build_meson(self) -> bool: # Configure with meson os.chdir(self.library_root.as_posix()) my_env = self._make_env() - print(LIB_ANALYSIS_STARTED) + logger.info(LIB_ANALYSIS_STARTED) if self.build_path.resolve() == self.library_root.resolve(): sys.exit(CMAKE_PATH_ERROR) @@ -321,7 +329,7 @@ def build_cmake(self) -> bool: # Config with cmake my_env = self._make_env() - print(LIB_ANALYSIS_STARTED) + logger.info(LIB_ANALYSIS_STARTED) if self.build_path.resolve() == self.library_root.resolve(): sys.exit(CMAKE_PATH_ERROR) @@ -418,7 +426,7 @@ def build_configure(self) -> bool: curr_dir = os.getcwd() os.chdir(self.build_path.as_posix()) - print(LIB_ANALYSIS_STARTED) + logger.info(LIB_ANALYSIS_STARTED) config_cmd = self._scan_build_args() + [ (self.library_root / "configure").as_posix(), f"--prefix=" + self.install_path.as_posix(), @@ -487,7 +495,7 @@ def build_makefile(self) -> bool: """ curr_dir = os.getcwd() - print(LIB_ANALYSIS_STARTED) + logger.info(LIB_ANALYSIS_STARTED) # Analyzing the library analysis_command = self._scan_build_args( @@ -568,15 +576,14 @@ def analyze(self): record_list = [] compiled_files = [] - print("") - print(" -- [Futag]: Analysing function declarations...") + logger.info("Analysing function declarations...") for functions in _load_json_files(decl_files, "function declarations"): # get global hash of all functions for func_hash in functions: if func_hash not in function_list: function_list[func_hash] = functions[func_hash] - print(" -- [Futag]: Analysing contexts...") + logger.info("Analysing contexts...") for contexts in _load_json_files(context_files, "context"): # get global hash of all functions global_hash = [x for x in function_list] @@ -592,10 +599,9 @@ def analyze(self): function_list[func_hash]["call_contexts"].append( call_xref) else: - print(" -- %s not found in global hash list!" % (func_hash)) + logger.warning("%s not found in global hash list!", func_hash) - print("") - print(" -- [Futag]: Analysing data types ...") + logger.info("Analysing data types ...") for types in _load_json_files(typeinfo_files, "data types"): for enum_it in types["enums"]: @@ -627,8 +633,7 @@ def analyze(self): if not exist: typedef_list.append(typedef_it) - print("") - print(" -- [Futag]: Analysing header files and compiler options...") + logger.info("Analysing header files and compiler options...") match_include = r"^\s*#include\s*([<\"][//_\-\w.<>]+[>\"])\s*$" for infofile in info_files: @@ -638,18 +643,17 @@ def analyze(self): with open(infofile, "r") as f: compiled_file = json.load(f) except JSONDecodeError: - print(f" -- [Futag]: Warning: Could not parse JSON in {infofile}") + logger.warning(f"Could not parse JSON in {infofile}") continue if not compiled_file or not compiled_file['file']: - print(f" -- [Futag]: Warning: loading json from file {infofile} failed!") + logger.warning(f"loading json from file {infofile} failed!") continue else: - print(f" -- [Futag]: Analyzing headers in file {infofile} ...") + logger.info(f"Analyzing headers in file {infofile} ...") code = [] if os.path.exists(compiled_file['file']): - print(" -- [Futag]: Getting info from file %s ..." % - (compiled_file['file'])) + logger.info("Getting info from file %s ...", compiled_file['file']) with open(compiled_file['file'], "r", errors="ignore") as f: code = f.readlines() headers = [] @@ -728,12 +732,12 @@ def analyze(self): with open(self.analysis_path / "futag-4consumer.json", "w") as f: json.dump(result_4_consumer, f) - print("Total functions: ", str(len(result["functions"]))) - print("Total functions for consumer programs: ", str(len(result_4_consumer["functions"]))) - print("Total enums: ", str(len(result["enums"]))) - print("Total records: ", str(len(result["records"]))) - print("Total typedefs: ", str(len(result["typedefs"]))) - print("Analysis result: ", (self.analysis_path / + logger.info("Total functions: %s", str(len(result["functions"]))) + logger.info("Total functions for consumer programs: %s", str(len(result_4_consumer["functions"]))) + logger.info("Total enums: %s", str(len(result["enums"]))) + logger.info("Total records: %s", str(len(result["records"]))) + logger.info("Total typedefs: %s", str(len(result["typedefs"]))) + logger.info("Analysis result: %s", (self.analysis_path / "futag-analysis-result.json").as_posix()) @@ -803,28 +807,27 @@ def auto_build(self) -> bool: bool: result of auto build. """ - print(AUTO_CONSUMER_BUILD_MSG) - print("-- [Futag]: Testing library: ", self.library_root.as_posix()) - print("-- [Futag]: Consumer program: ", self.consumer_root.as_posix()) - print("-- [Futag]: Analysis result: ", self.consumer_report_path.as_posix()) - print("") + logger.info(AUTO_CONSUMER_BUILD_MSG) + logger.info("Testing library: %s", self.library_root.as_posix()) + logger.info("Consumer program: %s", self.consumer_root.as_posix()) + logger.info("Analysis result: %s", self.consumer_report_path.as_posix()) if (self.consumer_root / "configure").exists(): - print(CONFIGURE_FOUND) + logger.info(CONFIGURE_FOUND) self.build_configure() return True # TODO: добавить возможность указать папку cmake!!! if (self.consumer_root / "CMakeLists.txt").exists(): - print(CMAKE_FOUND) + logger.info(CMAKE_FOUND) self.build_cmake() return True if (self.consumer_root / "Makefile").exists(): - print(MAKEFILE_FOUND) + logger.info(MAKEFILE_FOUND) self.build_makefile() return True - print(AUTO_BUILD_FAILED) + logger.error(AUTO_BUILD_FAILED) return False def build_cmake(self) -> bool: @@ -842,7 +845,7 @@ def build_cmake(self) -> bool: # Config with cmake my_env = self._make_env() - print(LIB_ANALYSIS_STARTED) + logger.info(LIB_ANALYSIS_STARTED) if self.build_path.resolve() == self.consumer_root.resolve(): sys.exit(CMAKE_PATH_ERROR) @@ -890,7 +893,7 @@ def build_configure(self) -> bool: curr_dir = os.getcwd() os.chdir(self.consumer_root.as_posix()) - print(LIB_ANALYSIS_STARTED) + logger.info(LIB_ANALYSIS_STARTED) config_cmd = self._scan_build_args() + [ (self.consumer_root / "configure").as_posix(), @@ -926,7 +929,7 @@ def build_makefile(self) -> bool: bool: result of building with Makefile. """ - print(LIB_ANALYSIS_STARTED) + logger.info(LIB_ANALYSIS_STARTED) # Analyzing the library analysis_command = self._scan_build_args( diff --git a/src/python/futag-package/src/futag/sysmsg.py b/src/python/futag-package/src/futag/sysmsg.py index 4879ab71..95918d4b 100644 --- a/src/python/futag-package/src/futag/sysmsg.py +++ b/src/python/futag-package/src/futag/sysmsg.py @@ -1,3 +1,7 @@ +# Copyright (c) 2023-2024 ISP RAS (https://www.ispras.ru) +# Licensed under the GNU General Public License v3.0 +# See LICENSE file in the project root for full license text. + # ************************************************** # ** ______ __ __ ______ ___ ______ ** # ** / ____/ / / / / /_ __/ / | / ____/ ** diff --git a/src/python/futag-package/tests/test_exceptions.py b/src/python/futag-package/tests/test_exceptions.py new file mode 100644 index 00000000..833abfeb --- /dev/null +++ b/src/python/futag-package/tests/test_exceptions.py @@ -0,0 +1,33 @@ +"""Tests for the Futag exception hierarchy.""" +import sys +import os +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) + +from futag.exceptions import ( + FutagError, InvalidPathError, InvalidConfigError, + BuildError, GenerationError, AnalysisError +) + + +class TestExceptionHierarchy: + def test_all_inherit_from_futag_error(self): + assert issubclass(InvalidPathError, FutagError) + assert issubclass(InvalidConfigError, FutagError) + assert issubclass(BuildError, FutagError) + assert issubclass(GenerationError, FutagError) + assert issubclass(AnalysisError, FutagError) + + def test_futag_error_is_exception(self): + assert issubclass(FutagError, Exception) + + def test_can_catch_with_base(self): + with pytest.raises(FutagError): + raise InvalidPathError("test path") + + def test_message_preserved(self): + try: + raise AnalysisError("bad json") + except FutagError as e: + assert "bad json" in str(e) diff --git a/src/python/futag-package/tests/test_fdp_generator.py b/src/python/futag-package/tests/test_fdp_generator.py new file mode 100644 index 00000000..47b88444 --- /dev/null +++ b/src/python/futag-package/tests/test_fdp_generator.py @@ -0,0 +1,49 @@ +"""Tests for the FuzzDataProviderGenerator class.""" +import sys +import os +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) + +from futag.fdp_generator import FuzzDataProviderGenerator + + +@pytest.fixture +def fdp_generator(tmp_futag_package, tmp_library_root): + return FuzzDataProviderGenerator(tmp_futag_package, tmp_library_root) + + +class TestFDPProperties: + def test_supports_c_false(self, fdp_generator): + assert fdp_generator.supports_c is False + + def test_needs_buffer_check_false(self, fdp_generator): + assert fdp_generator.needs_buffer_check is False + + def test_default_headers_includes_fdp(self, fdp_generator): + assert "fuzzer/FuzzedDataProvider.h" in fdp_generator.default_headers + + def test_harness_preamble_has_provider(self, fdp_generator): + assert "FuzzedDataProvider" in fdp_generator.harness_preamble + + +class TestFDPGenBuiltin: + def test_int_uses_consume_integral(self, fdp_generator): + result = fdp_generator._gen_builtin("x", {"type_name": "int"}) + assert any("ConsumeIntegral" in line for line in result["gen_lines"]) + assert result["buffer_size"] == [] + + def test_float_uses_consume_floating_point(self, fdp_generator): + result = fdp_generator._gen_builtin("x", {"type_name": "float"}) + assert any("ConsumeFloatingPoint" in line for line in result["gen_lines"]) + + def test_double_uses_consume_floating_point(self, fdp_generator): + result = fdp_generator._gen_builtin("x", {"type_name": "double"}) + assert any("ConsumeFloatingPoint" in line for line in result["gen_lines"]) + + +class TestFDPGenCstring: + def test_uses_consume_random_length_string(self, fdp_generator): + result = fdp_generator._gen_cstring("s", {"type_name": "const char *", "local_qualifier": ""}, 1) + assert any("ConsumeRandomLengthString" in line for line in result["gen_lines"]) + assert result["gen_free"] == [] diff --git a/src/python/futag-package/tests/test_fuzzer.py b/src/python/futag-package/tests/test_fuzzer.py new file mode 100644 index 00000000..55a2bf4b --- /dev/null +++ b/src/python/futag-package/tests/test_fuzzer.py @@ -0,0 +1,58 @@ +"""Tests for the Fuzzer module.""" +import sys +import os +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) + +from futag.fuzzer import BaseFuzzer, Fuzzer, NatchFuzzer + + +class TestFuzzerClassHierarchy: + def test_fuzzer_is_base_fuzzer(self): + assert issubclass(Fuzzer, BaseFuzzer) + + def test_natch_fuzzer_is_base_fuzzer(self): + assert issubclass(NatchFuzzer, BaseFuzzer) + + +class TestErrorId: + def test_deterministic(self): + fuzzer = BaseFuzzer.__new__(BaseFuzzer) + result1 = fuzzer._error_id("test error") + result2 = fuzzer._error_id("test error") + assert result1 == result2 + + def test_different_inputs_different_ids(self): + fuzzer = BaseFuzzer.__new__(BaseFuzzer) + result1 = fuzzer._error_id("error A") + result2 = fuzzer._error_id("error B") + assert result1 != result2 + + +class TestXmlEscape: + def test_ampersand(self): + fuzzer = BaseFuzzer.__new__(BaseFuzzer) + assert "&" in fuzzer._xml_escape("a & b") + + def test_less_than(self): + fuzzer = BaseFuzzer.__new__(BaseFuzzer) + assert "<" in fuzzer._xml_escape("a < b") + + def test_greater_than(self): + fuzzer = BaseFuzzer.__new__(BaseFuzzer) + assert ">" in fuzzer._xml_escape("a > b") + + def test_quote(self): + fuzzer = BaseFuzzer.__new__(BaseFuzzer) + assert """ in fuzzer._xml_escape('a "b" c') + + def test_newline(self): + fuzzer = BaseFuzzer.__new__(BaseFuzzer) + assert "a b" == fuzzer._xml_escape("a\nb") + + +class TestCorpusArgs: + def test_fuzzer_returns_empty(self): + fuzzer = Fuzzer.__new__(Fuzzer) + assert fuzzer._get_corpus_args(None) == [] diff --git a/src/python/futag-package/tests/test_generator.py b/src/python/futag-package/tests/test_generator.py new file mode 100644 index 00000000..a8c3257f --- /dev/null +++ b/src/python/futag-package/tests/test_generator.py @@ -0,0 +1,104 @@ +"""Tests for the Generator class type generation methods.""" +import sys +import os +import json +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) + +from futag.generator import Generator + + +@pytest.fixture +def generator(tmp_futag_package, tmp_library_root): + """Create a Generator instance with mock paths.""" + return Generator(tmp_futag_package, tmp_library_root) + + +class TestGenBuiltin: + def test_int_type(self, generator): + result = generator._gen_builtin("x", {"type_name": "int"}) + assert "gen_lines" in result + assert "gen_free" in result + assert "buffer_size" in result + assert any("memcpy" in line for line in result["gen_lines"]) + assert any("sizeof(int)" in line for line in result["gen_lines"]) + assert result["gen_free"] == [] + assert "sizeof(int)" in result["buffer_size"][0] + + def test_anonymous_namespace_stripped(self, generator): + result = generator._gen_builtin("x", {"type_name": "(anonymous namespace)::MyType"}) + assert not any("(anonymous namespace)" in line for line in result["gen_lines"]) + + def test_float_type(self, generator): + result = generator._gen_builtin("x", {"type_name": "float"}) + assert any("sizeof(float)" in s for s in result["buffer_size"]) + + +class TestGenCstring: + def test_basic_cstring(self, generator): + result = generator._gen_cstring("s", { + "base_type_name": "char *", + "type_name": "char *", + "local_qualifier": "" + }, 1) + assert any("malloc" in line for line in result["gen_lines"]) + assert any("memcpy" in line for line in result["gen_lines"]) + assert any("free" in line for line in result["gen_free"]) + + def test_const_cstring(self, generator): + result = generator._gen_cstring("s", { + "base_type_name": "char *", + "type_name": "const char *", + "local_qualifier": "const" + }, 1) + # Should have a reference variable + assert any("rs" in line for line in result["gen_lines"]) + + +class TestGenEnum: + def test_c_enum(self, generator): + enum_record = {"name": "Color", "qname": "Color", "enum_values": [ + {"field_name": "RED", "value": 0}, + {"field_name": "GREEN", "value": 1}, + ]} + result = generator._gen_enum(enum_record, "e", {"type_name": "Color"}, {"compiler": "CC"}) + assert any("enum_index" in line for line in result["gen_lines"]) + assert any("% 2" in line for line in result["gen_lines"]) + + def test_cxx_enum(self, generator): + enum_record = {"name": "Color", "qname": "Color", "enum_values": [ + {"field_name": "RED", "value": 0}, + ]} + result = generator._gen_enum(enum_record, "e", {"type_name": "Color"}, {"compiler": "CXX"}) + assert any("static_cast" in line for line in result["gen_lines"]) + + +class TestGenArray: + def test_basic_array(self, generator): + result = generator._gen_array("arr", { + "type_name": "int *", "base_type_name": "int", "length": 10 + }) + assert any("malloc" in line for line in result["gen_lines"]) + assert any("10" in line for line in result["gen_lines"]) + assert any("free" in line for line in result["gen_free"]) + + +class TestGenVoid: + def test_void_pointer(self, generator): + result = generator._gen_void("v") + assert any("NULL" in line for line in result["gen_lines"]) + assert result["gen_free"] == [] + assert result["buffer_size"] == [] + + +class TestGenQualifier: + def test_qualifier(self, generator): + result = generator._gen_qualifier("q_x", "x", {"type_name": "const int"}) + assert any("q_x" in line and "x" in line for line in result["gen_lines"]) + + +class TestGenPointer: + def test_pointer(self, generator): + result = generator._gen_pointer("p_x", "x", {"type_name": "int *"}) + assert any("& x" in line for line in result["gen_lines"]) diff --git a/src/python/futag-package/tests/test_preprocessor.py b/src/python/futag-package/tests/test_preprocessor.py new file mode 100644 index 00000000..964fd6bd --- /dev/null +++ b/src/python/futag-package/tests/test_preprocessor.py @@ -0,0 +1,62 @@ +"""Tests for the preprocessor module.""" +import sys +import os +import json +import pathlib +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "src")) + + +class TestLoadJsonFiles: + """Test the _load_json_files helper function.""" + + def test_skips_empty_files(self, tmp_path): + """Empty JSON files should be skipped.""" + from futag.preprocessor import _load_json_files + empty_file = tmp_path / "empty.json" + empty_file.write_text("") + results = list(_load_json_files([empty_file], "test")) + assert results == [] + + def test_warns_on_bad_json(self, tmp_path, capsys): + """Malformed JSON should print warning and continue.""" + from futag.preprocessor import _load_json_files + bad_file = tmp_path / "bad.json" + bad_file.write_text("{invalid json") + results = list(_load_json_files([bad_file], "test")) + assert results == [] + + def test_yields_valid_json(self, tmp_path): + """Valid JSON files should be yielded.""" + from futag.preprocessor import _load_json_files + good_file = tmp_path / "good.json" + good_file.write_text('{"key": "value"}') + results = list(_load_json_files([good_file], "test")) + assert len(results) == 1 + assert results[0] == {"key": "value"} + + def test_skips_none_json(self, tmp_path): + """JSON files containing null should be skipped.""" + from futag.preprocessor import _load_json_files + null_file = tmp_path / "null.json" + null_file.write_text("null") + results = list(_load_json_files([null_file], "test")) + assert results == [] + + +class TestParseLocation: + """Test the _parse_location helper.""" + + def test_basic_path(self): + from futag.preprocessor import _parse_location + result = _parse_location("/src/test.c:42") + assert result["file"] == "test.c" + assert result["line"] == "42" + assert result["directory"] == "/src" + assert result["fullpath"] == "/src/test.c" + + def test_path_with_colon(self): + from futag.preprocessor import _parse_location + result = _parse_location("/src/C:/test.c:10") + assert result["line"] == "10" diff --git a/src/python/template-script.py b/src/python/template-script.py index 9394d2d7..89bfe6d8 100644 --- a/src/python/template-script.py +++ b/src/python/template-script.py @@ -1,17 +1,27 @@ #!/usr/bin/env python3 +"""Futag template script demonstrating three usage patterns. + +Pattern 1 (lines 14-28): Basic library analysis and fuzz target generation. +Pattern 2 (lines 31-43): Consumer program analysis for context extraction. +Pattern 3 (lines 45-53): Context-based fuzz target generation from consumer usage. +""" from futag.preprocessor import * -from futag.generator import * +from futag.generator import * + +# ============================================================================= +# Pattern 1: Build, analyze, and generate fuzz targets for a library +# ============================================================================= test_build = Builder( -"../futag-llvm", #Путь к рабочей директории futag -"../json-c", #Путь к директории исходных текстов исследуемого приложения -flags="-g -O0", #Флаги при сборке -clean=True, #Очистить каталоги futag-build, futag-install, futag-analysis перед запуском, допустимые значение: (True/False)(Необязательный параметр, по-умолчанию False) -build_path="../json-c/futag-build", #Путь к директории futag-build (Необязательный параметр) -install_path="../json-c/futag-install", #Путь к директории futag-install (Необязательный параметр) -analysis_path="../json-c/futag-analysis", #Путь к директории futag-analysis (Необязательный параметр) -processes=4, #Колличество ядер процессора задействующихся при сборке (Необязательный параметр) -build_ex_params="--disable-zip" #Дополнительные параметры компилятора (Необязательный параметр) + "../futag-llvm", # Path to the futag-llvm working directory + "../json-c", # Path to the library source directory + flags="-g -O0", # Compiler flags for building + clean=True, # Clean futag-build/install/analysis dirs before running (default: False) + build_path="../json-c/futag-build", # Path to build directory (optional) + install_path="../json-c/futag-install", # Path to install directory (optional) + analysis_path="../json-c/futag-analysis", # Path to analysis directory (optional) + processes=4, # Number of CPU cores for building (optional) + build_ex_params="--disable-zip" # Extra build parameters (optional) ) test_build.auto_build() @@ -23,31 +33,38 @@ ) generator.gen_targets() generator.compile_targets( - workers=4, + workers=4, keep_failed=True ) +# ============================================================================= +# Pattern 2: Analyze a consumer program to extract library usage contexts +# ============================================================================= FUTAG_PATH = "/home/futag/Futag/futag-llvm" library_root = "json-c-json-c-0.16-20220414" consumer_root = "libstorj-1.0.3" -consumber_builder = ConsumerBuilder( - FUTAG_PATH, # путь к директории "futag-llvm" - library_root, # путь к директории содержащей исходные кода тестируемой библиотеки - consumer_root, # путь к директории содержащей исходные кода потребительской программы - # clean=True, - # processes=16, +consumer_builder = ConsumerBuilder( + FUTAG_PATH, # Path to the futag-llvm directory + library_root, # Path to the library source directory + consumer_root, # Path to the consumer program source directory + # clean=True, + # processes=16, ) -consumber_builder.auto_build() -consumber_builder.analyze() +consumer_builder.auto_build() +consumer_builder.analyze() + +# ============================================================================= +# Pattern 3: Generate fuzz targets from consumer usage contexts +# ============================================================================= context_generator = ContextGenerator( - FUTAG_PATH, - library_root, + FUTAG_PATH, + library_root, ) -context_generator.gen_context() # генерация фаззинг-оберток для контекстов -context_generator.compile_targets( #компиляция сгенерированных фаззинг-оберток +context_generator.gen_context() # Generate fuzz wrappers for contexts +context_generator.compile_targets( # Compile the generated fuzz wrappers keep_failed=True, )