diff --git a/src/reporting/filter_results/config.vsh.yaml b/src/reporting/filter_results/config.vsh.yaml
new file mode 100644
index 000000000..09e22ba7a
--- /dev/null
+++ b/src/reporting/filter_results/config.vsh.yaml
@@ -0,0 +1,162 @@
+name: filter_results
+namespace: reporting
+description: Filter dataset, method, metric info and results based on include/exclude criteria
+
+argument_groups:
+  - name: Inputs
+    arguments:
+    - name: --input_dataset_info
+      type: file
+      description: JSON file containing dataset information
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/dataset_info.json
+
+    - name: --input_method_info
+      type: file
+      description: JSON file containing method information
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/method_info.json
+
+    - name: --input_metric_info
+      type: file
+      description: JSON file containing metric information
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/metric_info.json
+
+    - name: --input_results
+      type: file
+      description: JSON file containing results
+      required: true
+      example: resources_test/openproblems/task_results_v4/processed/results.json
+
+  - name: Dataset filtering
+    description: |
+      Use these arguments to filter datasets by name. By default, all datasets are
+      included. If `--datasets_include` is defined, only those datasets are included. If
+      `--datasets_exclude` is defined, all datasets except those specified are included.
+      These arguments are mutually exclusive, so only `--datasets_include` OR
+      `--datasets_exclude` can be set but not both.
+    arguments:
+      - name: "--datasets_include"
+        type: string
+        multiple: true
+        description: |
+          A list of dataset ids to include. If specified, only these datasets will be included.
+      - name: "--datasets_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of dataset ids to exclude. If specified, all datasets except the ones listed will be included.
+
+  - name: Method filtering
+    description: |
+      Use these arguments to filter methods by name. By default, all methods are
+      included. If `--methods_include` is defined, only those methods are included. If
+      `--methods_exclude` is defined, all methods except those specified are included.
+      These arguments are mutually exclusive, so only `--methods_include` OR
+      `--methods_exclude` can be set but not both.
+    arguments:
+      - name: "--methods_include"
+        type: string
+        multiple: true
+        description: |
+          A list of method ids to include. If specified, only these methods will be included.
+      - name: "--methods_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of method ids to exclude. If specified, all methods except the ones listed will be included.
+
+  - name: Metric filtering
+    description: |
+      Use these arguments to filter metrics by name. By default, all metrics are
+      included. If `--metrics_include` is defined, only those metrics are included. If
+      `--metrics_exclude` is defined, all metrics except those specified are included.
+      These arguments are mutually exclusive, so only `--metrics_include` OR
+      `--metrics_exclude` can be set but not both.
+    arguments:
+      - name: "--metrics_include"
+        type: string
+        multiple: true
+        description: |
+          A list of metric ids to include. If specified, only these metrics will be included.
+      - name: "--metrics_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of metric ids to exclude. If specified, all metrics except the ones listed will be included.
+
+  - name: Outputs
+    arguments:
+    - name: --output_dataset_info
+      type: file
+      direction: output
+      default: filtered_dataset_info.json
+      description: Filtered dataset info JSON file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/dataset_info.json
+      example: resources_test/openproblems/task_results_v4/processed/filtered_dataset_info.json
+
+    - name: --output_method_info
+      type: file
+      direction: output
+      default: filtered_method_info.json
+      description: Filtered method info JSON file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/method_info.json
+      example: resources_test/openproblems/task_results_v4/processed/filtered_method_info.json
+
+    - name: --output_metric_info
+      type: file
+      direction: output
+      default: filtered_metric_info.json
+      description: Filtered metric info JSON file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/metric_info.json
+      example: resources_test/openproblems/task_results_v4/processed/filtered_metric_info.json
+
+    - name: --output_results
+      type: file
+      direction: output
+      default: filtered_results.json
+      description: Filtered results JSON file
+      info:
+        format:
+          type: json
+          schema: /common/schemas/results_v4/results.json
+      example: resources_test/openproblems/task_results_v4/processed/filtered_results.json
+
+resources:
+  - type: python_script
+    path: script.py
+  - path: /common/schemas
+    dest: schemas
+
+test_resources:
+  - type: python_script
+    path: /common/component_tests/run_and_check_output.py
+  - path: /resources_test/openproblems/task_results_v4
+    dest: resources_test/openproblems/task_results_v4
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1
+    setup:
+      - type: apt
+        packages:
+        - nodejs
+        - npm
+      - type: docker
+        run: npm install -g ajv-cli
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [lowmem, lowtime, lowcpu]
diff --git a/src/reporting/filter_results/script.py b/src/reporting/filter_results/script.py
new file mode 100644
index 000000000..952e5fab7
--- /dev/null
+++ b/src/reporting/filter_results/script.py
@@ -0,0 +1,311 @@
+## VIASH START
+par = {
+    "input_dataset_info": "resources_test/openproblems/task_results_v4/processed/dataset_info.json",
+    "input_method_info": "resources_test/openproblems/task_results_v4/processed/method_info.json",
+    "input_metric_info": "resources_test/openproblems/task_results_v4/processed/metric_info.json",
+    "input_results": "resources_test/openproblems/task_results_v4/processed/results.json",
+    "output_dataset_info": "resources_test/openproblems/task_results_v4/processed/filtered_dataset_info.json",
+    "output_method_info": "resources_test/openproblems/task_results_v4/processed/filtered_method_info.json",
+    "output_metric_info": "resources_test/openproblems/task_results_v4/processed/filtered_metric_info.json",
+    "output_results": "resources_test/openproblems/task_results_v4/processed/filtered_results.json",
+    "datasets_exclude": [
+        "cellxgene_census/tabula_sapiens",
+        "cellxgene_census/mouse_pancreas_atlas",
+    ],
+    "datasets_include": None,
+    "methods_exclude": None,
+    "methods_include": None,
+    "metrics_exclude": None,
+    "metrics_include": None,
+}
+meta = {"resources_dir": "target/executable/reporting/filter_results"}
+## VIASH END
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+
+
+def validate_filtering_args():
+    """Validate that include/exclude arguments are mutually exclusive."""
+    if par["datasets_include"] and par["datasets_exclude"]:
+        raise ValueError(
+            "Cannot specify both --datasets_include and --datasets_exclude"
+        )
+
+    if par["methods_include"] and par["methods_exclude"]:
+        raise ValueError("Cannot specify both --methods_include and --methods_exclude")
+
+    if par["metrics_include"] and par["metrics_exclude"]:
+        raise ValueError("Cannot specify both --metrics_include and --metrics_exclude")
+
+
+def apply_name_filter(
+    data_list: List[Dict[str, Any]],
+    include_list: Optional[List[str]] = None,
+    exclude_list: Optional[List[str]] = None,
+    item_type: str = "item",
+) -> List[Dict[str, Any]]:
+    """Apply filtering to a list based on name field."""
+    if not data_list:
+        return data_list
+
+    original_count = len(data_list)
+    item_names = [item["name"] for item in data_list]
+
+    if include_list:
+        items_to_include = set(item_names) & set(include_list)
+        if not items_to_include:
+            print(
+                f"Warning: None of the specified {item_type}s to include were found in the data",
+                file=sys.stderr,
+            )
+            return []
+
+        missing_items = set(include_list) - set(item_names)
+        if missing_items:
+            print(
+                f"Warning: The following {item_type}s specified in include list were not found: "
+                + ", ".join(missing_items),
+                file=sys.stderr,
+            )
+
+        filtered_data = [item for item in data_list if item["name"] in items_to_include]
+        print(f"Included {len(filtered_data)} out of {original_count} {item_type}s")
+        return filtered_data
+
+    elif exclude_list:
+        items_to_exclude = set(item_names) & set(exclude_list)
+
+        missing_items = set(exclude_list) - set(item_names)
+        if missing_items:
+            print(
+                f"Warning: The following {item_type}s specified in exclude list were not found: "
+                + ", ".join(missing_items),
+                file=sys.stderr,
+            )
+
+        filtered_data = [
+            item for item in data_list if item["name"] not in items_to_exclude
+        ]
+        print(
+            f"Excluded {len(items_to_exclude)} {item_type}s, keeping {len(filtered_data)} out of {original_count} {item_type}s"
+        )
+        return filtered_data
+
+    # No filtering applied
+    return data_list
+
+
+def filter_results_data(
+    results_data: List[Dict[str, Any]],
+    dataset_names: List[str],
+    method_names: List[str],
+    metric_names: List[str],
+) -> List[Dict[str, Any]]:
+    """Filter results based on dataset, method, and metric filters."""
+    if not results_data:
+        return results_data
+
+    original_count = len(results_data)
+
+    # Filter result entries based on dataset_name, method_name, and metric_names
+    filtered_results = []
+    for result in results_data:
+        dataset_keep = result["dataset_name"] in dataset_names
+        method_keep = result["method_name"] in method_names
+
+        # Check whether this result should be kept
+        if dataset_keep and method_keep:
+            filtered_result = result.copy()
+
+            filtered_metrics = [
+                (i, name)
+                for i, name in enumerate(result["metric_names"])
+                if name in metric_names
+            ]
+
+            # store metric names
+            filtered_result["metric_names"] = [name for _, name in filtered_metrics]
+
+            # store metric values
+            filtered_result["metric_values"] = [
+                result["metric_values"][i] for i, _ in filtered_metrics
+            ]
+
+            # store metric components
+            new_metric_components = []
+            for component in result.get("metric_components", []):
+                new_component = component.copy()
+                new_component["metric_names"] = [
+                    name for name in component["metric_names"] if name in metric_names
+                ]
+
+                # if metric_names are not empty
+                if new_component["metric_names"]:
+                    new_metric_components.append(new_component)
+            filtered_result["metric_components"] = new_metric_components
+
+            filtered_results.append(filtered_result)
+
+    print(
+        f"Filtered results: keeping {len(filtered_results)} out of {original_count} result entries"
+    )
+    return filtered_results
+
+
+def validate_json_against_schema(
+    json_file: str, schema_file: str, name: str
+) -> tuple[bool, str]:
+    """Validate a JSON file against its schema using ajv-cli.
+
+    Returns:
+        tuple[bool, str]: (is_valid, error_message)
+    """
+    try:
+        cmd = [
+            "ajv",
+            "validate",
+            "--spec",
+            "draft2020",
+            "-s",
+            schema_file,
+            "-r",
+            str(Path(meta["resources_dir"]) / "schemas" / "results_v4" / "core.json"),
+            "-d",
+            json_file,
+        ]
+
+        result = subprocess.run(cmd, capture_output=True, text=True)
+
+        if result.returncode == 0:
+            print(f"✓ {name} validation passed")
+            return True, ""
+        else:
+            error_msg = ""
+            if result.stderr:
+                error_msg += f"stderr: {result.stderr.strip()}"
+            if result.stdout:
+                error_msg += f"\nstdout: {result.stdout.strip()}"
+            if not error_msg:
+                error_msg = "Unknown validation error"
+
+            return False, error_msg
+
+    except FileNotFoundError:
+        return False, "ajv-cli not found. Cannot validate schema"
+
+
+print("====== Filter results ======")
+
+# Validation
+print("\n>>> Validating arguments...")
+validate_filtering_args()
+
+# Read input files
+print("\n>>> Reading input files...")
+
+print(f'Reading dataset info from "{par["input_dataset_info"]}"...')
+with open(par["input_dataset_info"], "r") as f:
+    dataset_info = json.load(f)
+
+print(f'Reading method info from "{par["input_method_info"]}"...')
+with open(par["input_method_info"], "r") as f:
+    method_info = json.load(f)
+
+print(f'Reading metric info from "{par["input_metric_info"]}"...')
+with open(par["input_metric_info"], "r") as f:
+    metric_info = json.load(f)
+
+print(f'Reading results from "{par["input_results"]}"...')
+with open(par["input_results"], "r") as f:
+    results = json.load(f)
+
+# Apply filters
+print("\n>>> Applying filters...")
+
+print("Filtering datasets...")
+filtered_dataset_info = apply_name_filter(
+    dataset_info, par["datasets_include"], par["datasets_exclude"], "dataset"
+)
+
+print("Filtering methods...")
+filtered_method_info = apply_name_filter(
+    method_info, par["methods_include"], par["methods_exclude"], "method"
+)
+
+print("Filtering metrics...")
+filtered_metric_info = apply_name_filter(
+    metric_info, par["metrics_include"], par["metrics_exclude"], "metric"
+)
+
+# Get names for results filtering
+filtered_dataset_names = [item["name"] for item in filtered_dataset_info]
+filtered_method_names = [item["name"] for item in filtered_method_info]
+filtered_metric_names = [item["name"] for item in filtered_metric_info]
+
+print("Filtering results...")
+filtered_results = filter_results_data(
+    results, filtered_dataset_names, filtered_method_names, filtered_metric_names
+)
+
+# Write and validate output files
+print("\n>>> Writing and validating output files...")
+results_schemas_dir = Path(meta["resources_dir"]) / "schemas" / "results_v4"
+
+validation_files = [
+    {
+        "data": filtered_dataset_info,
+        "schema": "dataset_info.json",
+        "file": par["output_dataset_info"],
+        "name": "dataset info",
+    },
+    {
+        "data": filtered_method_info,
+        "schema": "method_info.json",
+        "file": par["output_method_info"],
+        "name": "method info",
+    },
+    {
+        "data": filtered_metric_info,
+        "schema": "metric_info.json",
+        "file": par["output_metric_info"],
+        "name": "metric info",
+    },
+    {
+        "data": filtered_results,
+        "schema": "results.json",
+        "file": par["output_results"],
+        "name": "results",
+    },
+]
+
+all_valid = True
+for validation in validation_files:
+    print(f'Writing {validation["name"]} to "{validation["file"]}"...')
+    with open(validation["file"], "w") as f:
+        json.dump(validation["data"], f, indent=2, ensure_ascii=False)
+
+    print(f'Validating {validation["name"]}...')
+    schema_file = str(results_schemas_dir / validation["schema"])
+    is_valid, error_msg = validate_json_against_schema(
+        validation["file"], schema_file, validation["name"]
+    )
+    if not is_valid:
+        print(f'✗ {validation["name"]} validation failed')
+        print(f"Validation error: {error_msg}")
+        all_valid = False
+
+if not all_valid:
+    raise RuntimeError("One or more output files do not conform to their schemas")
+
+# Summary
+print("\n>>> Summary of filtering results:")
+print(f"Datasets: {len(filtered_dataset_info)} (from {len(dataset_info)})")
+print(f"Methods: {len(filtered_method_info)} (from {len(method_info)})")
+print(f"Metrics: {len(filtered_metric_info)} (from {len(metric_info)})")
+print(f"Results: {len(filtered_results)} (from {len(results)})")
+
+print("\n>>> Done!")
diff --git a/src/reporting/process_task_results/config.vsh.yaml b/src/reporting/process_task_results/config.vsh.yaml
index e1703bf52..c3dcd3699 100644
--- a/src/reporting/process_task_results/config.vsh.yaml
+++ b/src/reporting/process_task_results/config.vsh.yaml
@@ -44,6 +44,63 @@ argument_groups:
         description: Nextflow execution trace file
         example: resources_test/openproblems/task_results_v4/raw/trace.txt
 
+  - name: Dataset filtering
+    description: |
+      Use these arguments to filter datasets by name. By default, all datasets are
+      run. If `--datasets_include` is defined, only those datasets are run. If
+      `--datasets_exclude` is defined, all datasets except those specified are run.
+      These arguments are mutually exclusive, so only `--datasets_include` OR
+      `--datasets_exclude` can set but not both.
+    arguments:
+      - name: "--datasets_include"
+        type: string
+        multiple: true
+        description: |
+          A list of dataset ids to include. If specified, only these datasets will be run.
+      - name: "--datasets_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of dataset ids to exclude. If specified, all datasets except the ones listed will be run.
+
+  - name: Method filtering
+    description: |
+      Use these arguments to filter methods by name. By default, all methods are
+      run. If `--methods_include` is defined, only those methods are run. If
+      `--methods_exclude` is defined, all methods except those specified are run.
+      These arguments are mutually exclusive, so only `--methods_include` OR
+      `--methods_exclude` can set but not both.
+    arguments:
+      - name: "--methods_include"
+        type: string
+        multiple: true
+        description: |
+          A list of method ids to include. If specified, only these methods will be run.
+      - name: "--methods_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of method ids to exclude. If specified, all methods except the ones listed will be run.
+
+  - name: Metric filtering
+    description: |
+      Use these arguments to filter metrics by name. By default, all metrics are
+      run. If `--metrics_include` is defined, only those metrics are run. If
+      `--metrics_exclude` is defined, all metrics except those specified are run.
+      These arguments are mutually exclusive, so only `--metrics_include` OR
+      `--metrics_exclude` can set but not both.
+    arguments:
+      - name: "--metrics_include"
+        type: string
+        multiple: true
+        description: |
+          A list of metric ids to include. If specified, only these metrics will be run.
+      - name: "--metrics_exclude"
+        type: string
+        multiple: true
+        description: |
+          A list of metric ids to exclude. If specified, all metrics except the ones listed will be run.
+
   - name: Outputs
     arguments:
       - name: "--output_combined"
@@ -137,6 +194,7 @@ dependencies:
   - name: reporting/get_metric_info
   - name: reporting/get_dataset_info
   - name: reporting/get_task_info
+  - name: reporting/filter_results
   - name: reporting/generate_qc
   - name: reporting/combine_output
   - name: reporting/render_report
diff --git a/src/reporting/process_task_results/main.nf b/src/reporting/process_task_results/main.nf
index 1fc64f389..059960d65 100644
--- a/src/reporting/process_task_results/main.nf
+++ b/src/reporting/process_task_results/main.nf
@@ -59,6 +59,32 @@ workflow run_wf {
       ]
     )
 
+    | filter_results.run(
+      runIf: { id, state ->
+        // Only run filtering if there are include/exclude lists defined
+        return state.datasets_exclude || state.methods_exclude || state.metrics_exclude ||
+          state.datasets_include || state.methods_include || state.metrics_include
+      },
+      fromState: [
+        "input_dataset_info": "output_dataset",
+        "input_method_info": "output_method",
+        "input_metric_info": "output_metric",
+        "input_results": "output_results",
+        "datasets_include": "datasets_include",
+        "datasets_exclude": "datasets_exclude",
+        "methods_include": "methods_include",
+        "methods_exclude": "methods_exclude",
+        "metrics_include": "metrics_include",
+        "metrics_exclude": "metrics_exclude"
+      ],
+      toState: [
+        "output_dataset": "output_dataset_info",
+        "output_method": "output_method_info",
+        "output_metric": "output_metric_info",
+        "output_results": "output_results"
+      ]
+    )
+
     | generate_qc.run(
       fromState: [
         "input_task_info": "output_task",