Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[run]
omit =
src/hario_core/interfaces.py
src/hario_core/**/interfaces.py

[report]
exclude_lines =
Expand Down
4 changes: 1 addition & 3 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
[flake8]
max-line-length = 88
extend-ignore = E203
exclude =
tests/samples.py
extend-ignore = E203
7 changes: 3 additions & 4 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,10 @@ jobs:
python -m pip install --upgrade pip
pip install .[dev]

- name: Lint with flake8, black, mypy
- name: Lint with pre-commit
run: |
flake8 .
black --check .
mypy -p hario_core
pre-commit run --all-files

- name: Test with pytest
run: |
pytest --cov --cov-branch --cov-report=xml
Expand Down
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ __pycache__/
*.py[cod]
*$py.class

# Benchmarks
benchmarks/*.har
benchmarks/*.stats
benchmarks/*.csv

# C extensions
*.so

Expand Down
9 changes: 8 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,27 @@ repos:
rev: 24.3.0
hooks:
- id: black
files: ^src/|^tests/
exclude: ^tests/samples.py$
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
files: ^src/|^tests/
exclude: ^tests/samples.py$
- repo: https://github.com/pycqa/flake8
rev: 6.0.0
hooks:
- id: flake8
files: ^src/|^tests/
exclude: ^tests/samples.py$
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.7.1
hooks:
- id: mypy
files: ^src/|^tests/
exclude: ^tests/samples.py$
args: ["--config-file=pyproject.toml"]
exclude: ^(docs|examples)/
additional_dependencies:
- pydantic

65 changes: 49 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ A modern, extensible, and type-safe Python library for parsing, transforming, an
## Features

- **Type-Safe Parsing**: Validates HAR files using Pydantic models, catching errors early.
- **Transformers**: Apply built-in or custom transformations to each HAR entry (e.g., flattening, normalization).
- **Transformers**: Apply built-in or custom transformations to each HAR entry (e.g., flatten, normalizations).
- **Normalization**: Ensures all numeric fields (sizes, timings) are non-negative, so you can safely sum, aggregate, and analyze data without errors from negative values. This is crucial for analytics and reporting.
- **Deterministic & Random IDs**: Generate unique or deterministic IDs for each entry. Deterministic IDs ensure that the same request always gets the same ID—useful for deduplication, comparison, and building analytics pipelines.
- **Extensible**: Register your own entry models to support browser-specific or proprietary HAR extensions (e.g., Chrome DevTools, Safari).
Expand All @@ -24,32 +24,65 @@ pip install hario-core

## Quickstart

### 1. Parse and validate a HAR file

```python
from hario_core import parse

har_log = parse("example.har")
entries = har_log.model_dump()["entries"] # list of dicts
```

### 2. Transform entries with a pipeline

```python
from hario_core.transform import Pipeline, flatten, set_id, by_field

pipeline = Pipeline([
set_id(by_field(["request.url", "startedDateTime"]))
])
results = pipeline.process(entries)
```

### 3. Custom entry models (extensions)

```python
from hario_core import parse, Pipeline, by_field, normalize_sizes, flatten
from hario_core.parse import register_entry_model
from hario_core.models import Entry

# Build a processing pipeline: deterministic ID, normalization, flattening
pipeline = Pipeline(
id_fn=by_field(["request.url", "startedDateTime"]),
transformers=[normalize_sizes(), flatten()],
)
def is_custom_entry(entry: dict) -> bool:
return "x-custom" in entry

# Parse your HAR file (from path, bytes, or file-like object)
model = parse("example.har")
result_dict = pipeline.process(model)
class CustomEntry(Entry):
x_custom: str

for entry in result_dict:
print(entry["id"], entry["request"]["url"])
register_entry_model(is_custom_entry, CustomEntry)
```

## Public API

### Parsing and validation
- `parse(path_or_bytes_or_filelike) -> HarLog`
- `validate(har_dict: dict) -> HarLog`
- `register_entry_model(detector: Callable, model: Type[Entry])`
- `entry_selector(entry_dict: dict) -> Type[Entry]`

### Models
- `Entry`, `HarLog`, `DevToolsEntry` (and all standard HAR 1.2 models)

### Transform
- `Pipeline`, `flatten`, `normalize_sizes`, `normalize_timings`, `set_id`, `by_field`, `uuid`, `json_array_handler`

## Documentation

- [API Reference](docs/api.md)
- [Changelog](docs/changelog.md)
- [Contributing](CONTRIBUTING.md)
- [API Reference](https://github.com/pikulev/hario-core/blob/main/docs/api.md)
- [Changelog](https://github.com/pikulev/hario-core/blob/main/docs/changelog.md)
- [Contributing](https://github.com/pikulev/hario-core/blob/main/CONTRIBUTING.md)


## License

MIT License. See [LICENSE](LICENSE).
MIT License. See [LICENSE](https://github.com/pikulev/hario-core/blob/main/LICENSE).

## Supported Python Versions

Expand Down
111 changes: 111 additions & 0 deletions benchmarks/bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from bench_core import (
STRATEGIES, HAR_PATH,
bench_flatten, bench_full, bench_normalize_sizes, bench_normalize_timings, bench_cpu_heavy,
create_results_table, create_results_csv, average_run, get_entries
)
from rich.console import Console
import argparse
import cProfile
import pstats
import sys



def main() -> None:
parser = argparse.ArgumentParser(
description="""
Microbenchmark for HAR Pipeline with different strategies, averaging, profiling and CSV output.

Example usage:
python bench.py flatten -f my.har --no-gc --csv results.csv
python bench.py --csv all_results.csv
python bench.py full --profile process
""",
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
"mode",
nargs="?",
default="all",
choices=["flatten", "normalize", "full", "cpu_heavy", "all"],
help="Benchmark mode: flatten, normalize, full, cpu_heavy, all (default: all)"
)
parser.add_argument(
"-f", "--file",
default=HAR_PATH,
help="Path to HAR file (default: benchmarks/test_lg.har)"
)
parser.add_argument(
"--profile",
choices=STRATEGIES,
help="Enable cProfile profiling for the given strategy (e.g. --profile process)"
)
parser.add_argument(
"--no-gc",
action="store_true",
help="Disable GC during measurement (default: GC enabled)"
)
parser.add_argument(
"--csv",
nargs="?",
const="-",
help="Save results to CSV file (or stdout if no file is specified)"
)
args = parser.parse_args()

mode = args.mode
har_path = args.file
profile_strategy = args.profile
use_gc = not args.no_gc

console = Console()
console.print(f"Loading HAR file: {har_path} ...")
entries = get_entries(har_path)
console.print(f"Loaded {len(entries)} entries.")

bench_map = {
"flatten": bench_flatten,
"normalize_sizes": bench_normalize_sizes,
"normalize_timings": bench_normalize_timings,
"full": bench_full,
"cpu_heavy": bench_cpu_heavy,
}

if profile_strategy:
if mode == "all":
console.print("[red]Profiling is only available for a single mode, not for 'all'.[/red]")
sys.exit(1)
bench_func = bench_map[mode]
profile_file = f"benchmarks/{mode}.stats"
console.print(f"Profiling... profile saved in {profile_file} (strategy: {profile_strategy})")
def prof():
bench_func(entries, profile_strategy, use_gc=use_gc)
cProfile.runctx("prof()", globals(), locals(), profile_file)
p = pstats.Stats(profile_file)
console.print("\n=== TOP-20 functions by cumtime ===\n")
p.strip_dirs().sort_stats("cumtime").print_stats(20)
console.print(f"\n=== For visualization, run: snakeviz {profile_file} ===")
else:
results = {strategy: {} for strategy in STRATEGIES}
if mode == "all":
for test_name, bench_func in bench_map.items():
for strategy in STRATEGIES:
console.print(f"\n[bold]Running {test_name} with {strategy} strategy...[/bold]")
elapsed, current, peak, rss = average_run(bench_func, entries, strategy, use_gc=use_gc)
results[strategy][test_name] = (elapsed, current, peak, rss)
else:
bench_func = bench_map[mode]
for strategy in STRATEGIES:
console.print(f"\n[bold]Running {mode} with {strategy} strategy...[/bold]")
elapsed, current, peak, rss = average_run(bench_func, entries, strategy, use_gc=use_gc)
results[strategy][mode] = (elapsed, current, peak, rss)
# Display results table
table = create_results_table(results)
console.print(table)
# CSV output
if args.csv:
filename = None if args.csv == "-" else args.csv
create_results_csv(results, f"benchmarks/{filename}")

if __name__ == "__main__":
main()
Loading