Added new benchmark

alexandrutomescu · alexandrutomescu · commit a24098728d1e · 2026-03-10T19:14:52.000+02:00
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -29,9 +29,13 @@ jobs:
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         pip install -e .
     
-    - name: Run benchmark script
+    - name: Run benchmark script Mouse.PacBio_1_perwidth
       working-directory: benchmarks
-      run: bash run_Mouse.PacBio_1_perwidth.sh results-docs 120
+      run: bash run_Mouse.PacBio_1_perwidth.sh results-docs 180
+
+    - name: Run benchmark script SRR020730
+      working-directory: benchmarks
+      run: bash run_SRR020730.sh results-docs 180
         
     - name: Commit benchmark results
       run: |
@@ -40,7 +44,26 @@ jobs:
         git add benchmarks/results-docs/Mouse.PacBio_reads_1_perwidth.flow_corrected.grp.md
         git diff --staged --quiet || git commit -m "Update benchmark results [skip ci]"
         git push
-    
+
+    - name: Trigger documentation workflow
+      uses: actions/github-script@v7
+      with:
+        script: |
+          await github.rest.actions.createWorkflowDispatch({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            workflow_id: 'docs.yml',
+            ref: 'main'
+          })
+
+    - name: Commit benchmark results
+      run: |
+        git config --local user.email "github-actions[bot]@users.noreply.github.com"
+        git config --local user.name "github-actions[bot]"
+        git add benchmarks/results-docs/SRR020730.md
+        git diff --staged --quiet || git commit -m "Update benchmark results [skip ci]"
+        git push
+        
     - name: Trigger documentation workflow
       uses: actions/github-script@v7
       with:
diff --git a/benchmarks/BENCHMARK_VERSION b/benchmarks/BENCHMARK_VERSION
@@ -1 +1 @@
-1.0.7
+1.0.8
diff --git a/benchmarks/aggregate_results.py b/benchmarks/aggregate_results.py
@@ -181,6 +181,32 @@ def _compute_speedup_stats(
 
         return dict(speedups_by_interval)
     
+    def load_from_file(self, file_path: str) -> Tuple[str, str, List[BenchmarkResult]]:
+        """
+        Load results from a specific JSON file.
+
+        Parameters
+        ----------
+        file_path : str
+            Path to the results JSON file
+
+        Returns
+        -------
+        tuple
+            (model_name, dataset_name, list of BenchmarkResult)
+        """
+        path = Path(file_path)
+        if not path.exists():
+            raise FileNotFoundError(f"Results file not found: {file_path}")
+
+        with open(path, 'r') as f:
+            data = json.load(f)
+
+        model_name = data['model']
+        dataset_name = data['dataset']
+        results = [BenchmarkResult.from_dict(r) for r in data['results']]
+        return model_name, dataset_name, results
+
     def load_model_results(self, model_name: str) -> Dict[str, List[BenchmarkResult]]:
         """
         Load all results for a specific model across all datasets.
@@ -755,7 +781,13 @@ def main():
     
     parser.add_argument(
         'model',
-        help='Model name (e.g., MinFlowDecomp)'
+        nargs='?',
+        help='Model name (e.g., MinFlowDecomp). Required unless --results-file is given.'
+    )
+    parser.add_argument(
+        '--results-file',
+        help='Path to a specific results JSON file. When provided, a table is generated '
+             'from that file only and --results-dir / model are ignored.'
     )
     parser.add_argument(
         '--results-dir',
@@ -789,10 +821,20 @@ def main():
     
     # Load results
     aggregator = ResultsAggregator(args.results_dir)
-    results_by_dataset = aggregator.load_model_results(args.model)
+
+    if args.results_file:
+        # Load from a specific JSON file
+        model_name, dataset_name, results = aggregator.load_from_file(args.results_file)
+        results_by_dataset = {dataset_name: results}
+        effective_model = model_name
+    else:
+        if not args.model:
+            parser.error("model is required when --results-file is not provided")
+        results_by_dataset = aggregator.load_model_results(args.model)
+        effective_model = args.model
     
     if not results_by_dataset:
-        print(f"No results found for model: {args.model}")
+        print(f"No results found")
         return
     
     # Generate output
@@ -801,17 +843,17 @@ def main():
     for dataset_name, results in sorted(results_by_dataset.items()):
         if args.format == 'markdown':
             table = aggregator.generate_markdown_table(
-                args.model, dataset_name, results, args.interval_size, args.metric
+                effective_model, dataset_name, results, args.interval_size, args.metric
             )
             output_lines.append(table)
         elif args.format == 'latex':
             table = aggregator.generate_latex_table(
-                args.model, dataset_name, results, args.interval_size, args.metric
+                effective_model, dataset_name, results, args.interval_size, args.metric
             )
             output_lines.append(table)
         else:  # console
             aggregator.print_console_table(
-                args.model, dataset_name, results, args.interval_size, args.metric
+                effective_model, dataset_name, results, args.interval_size, args.metric
             )
     
     # Write to file if specified, or default to results directory
@@ -823,7 +865,7 @@ def main():
             output_dir = Path(args.results_dir)
             output_dir.mkdir(exist_ok=True)
             ext = 'md' if args.format == 'markdown' else 'tex'
-            output_path = output_dir / f"{args.model}.{ext}"
+            output_path = output_dir / f"{effective_model}.{ext}"
         
         if output_lines:
             # Make overwrite behavior explicit for generated markdown/latex exports.
diff --git a/benchmarks/datasets/catfish-data/rnaseq/salmon/sparse_quant_SRR020730.graph.gz b/benchmarks/datasets/catfish-data/rnaseq/salmon/sparse_quant_SRR020730.graph.gz
diff --git a/benchmarks/run_Mouse.PacBio_1_perwidth.sh b/benchmarks/run_Mouse.PacBio_1_perwidth.sh
@@ -26,22 +26,23 @@ echo ""
 python benchmark_minflowdecomp.py \
     --datasets datasets/esa2025/Mouse.PacBio_reads_1_perwidth.flow_corrected.grp.gz \
     --min-width 1 \
-    --max-width 9 \
+    --max-width 6 \
     --time-limit "$TIME_LIMIT"
 
 echo ""
 echo "Step 2: Viewing results in console"
 echo ""
 
 # Display results in console
-python aggregate_results.py MinFlowDecomp
+python aggregate_results.py --results-file "results/MinFlowDecomp_Mouse.PacBio_reads_1_perwidth.flow_corrected.json"
 
 echo ""
 echo "Step 3: Generating markdown table"
 echo ""
 
 # Generate markdown table
-python aggregate_results.py MinFlowDecomp \
+python aggregate_results.py \
+    --results-file "results/MinFlowDecomp_Mouse.PacBio_reads_1_perwidth.flow_corrected.json" \
     --format markdown \
     --output "$OUTPUT_DIR/Mouse.PacBio_reads_1_perwidth.flow_corrected.grp.md" \
     --metric mean
diff --git a/benchmarks/run_Mouse.PacBio_5_perwidth.sh b/benchmarks/run_Mouse.PacBio_5_perwidth.sh
@@ -28,14 +28,15 @@ echo "Step 2: Viewing results in console"
 echo ""
 
 # Display results in console
-python aggregate_results.py MinFlowDecomp
+python aggregate_results.py --results-file "results/MinFlowDecomp_Mouse.PacBio_reads_5_perwidth.flow_corrected.json"
 
 echo ""
 echo "Step 3: Generating markdown table"
 echo ""
 
 # Generate markdown table
-python aggregate_results.py MinFlowDecomp \
+python aggregate_results.py \
+    --results-file "results/MinFlowDecomp_Mouse.PacBio_reads_5_perwidth.flow_corrected.json" \
     --format markdown \
     --output results/Mouse.PacBio_reads_5_perwidth.flow_corrected.grp.md \
     --metric mean
diff --git a/benchmarks/run_SRR020730.sh b/benchmarks/run_SRR020730.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Example script showing how to run the benchmark suite
+# This demonstrates a complete workflow from running benchmarks to generating tables
+
+echo "=================================="
+echo "Flowpaths Benchmark Suite Example"
+echo "=================================="
+echo ""
+
+# Navigate to benchmarks directory
+cd "$(dirname "$0")"
+
+# Get output directory from parameter or use default
+OUTPUT_DIR="${1:-results}"
+# Get benchmark time limit (seconds) from parameter or use default
+TIME_LIMIT="${2:-300}"
+
+# Create results directory if it doesn't exist
+mkdir -p "$OUTPUT_DIR"
+
+echo "Step 1: Running MinFlowDecomp benchmark on small dataset"
+echo ""
+
+# Run benchmark on the small dataset
+python benchmark_minflowdecomp.py \
+    --datasets datasets/catfish-data/rnaseq/salmon/sparse_quant_SRR020730.graph.gz \
+    --time-limit "$TIME_LIMIT"
+
+echo ""
+echo "Step 2: Viewing results in console"
+echo ""
+
+# Display results in console
+python aggregate_results.py --results-file "results/MinFlowDecomp_sparse_quant_SRR020730.json"
+
+echo ""
+echo "Step 3: Generating markdown table"
+echo ""
+
+# Generate markdown table
+python aggregate_results.py \
+    --results-file "results/MinFlowDecomp_sparse_quant_SRR020730.json" \
+    --format markdown \
+    --output "$OUTPUT_DIR/sparse_quant_SRR020730.md" \
+    --metric mean
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
@@ -7,4 +7,6 @@ hide:
 
 This page shows the latest benchmark results for the MinFlowDecomp solver on small test datasets.
 
---8<-- "benchmarks/results-docs/Mouse.PacBio_reads_1_perwidth.flow_corrected.grp.md"
+--8<-- "benchmarks/results-docs/Mouse.PacBio_reads_1_perwidth.flow_corrected.grp.md"
+
+--8<-- "benchmarks/results-docs/SRR020730.md"