leomaurodesenv · leomaurodesenv · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025 · Jul 30, 2025
diff --git a/README.md b/README.md
@@ -28,6 +28,8 @@ We have sorted the resources into five separate folders.
 - 🔧 [src/extracting_context/](src/extracting_context/) - Extract contexts from data.
 - 🔧 [src/question_answer/](src/question_answer/) - Generate questions and answers.
 - 🔧 [src/sampling/](src/sampling/) - Sample representative questions and answers.
+- 🔧 [src/labeling_llm/](src/labeling_llm/) - Label the samples using LLMs.
+
 
 ```sh
 # Setup `uv` in your machine
@@ -54,6 +56,23 @@ $ uv run -m src.sampling.run
 $ uv run -m src.labeling_llm.run
 ```
 
+---
+## Performing Experiments
+
+This section outlines how to run experiments using the QASports dataset, focusing on document retrieval and document reader tasks.
+
+```sh
+# Setup `uv` in your machine
+# https://github.com/astral-sh/uv
+# Installing packages
+$ uv sync
+
+# 1. Document Retriever Experiments
+$ uv run -m experiments.doc_retriever --help
+# 2. Document Reader Experiments
+$ uv run -m experiments.doc_reader --help
+```
+
 ---
 ## Citation
 

diff --git a/experiments/README.md b/experiments/README.md
diff --git a/experiments/doc_reader.py b/experiments/doc_reader.py
@@ -0,0 +1,121 @@
+"""Document Reader Experiments"""
+
+import argparse
+from haystack import Pipeline
+from haystack.nodes import FARMReader
+# from haystack.utils import print_answers
+
+from .module import Dataset, DocReader, Sports
+from .module import (
+    SQuadDataset,
+    AdversarialQADataset,
+    DuoRCDataset,
+    QASportsDataset,
+)
+
+
+# Model setup
+# DATASET = Dataset.QASports
+# DOC_READER = DocReader.BERT
+# SPORT = Sports.SKIING
+parser = argparse.ArgumentParser(description="Run document reader experiments.")
+parser.add_argument(
+    "--dataset",
+    type=str,
+    default="QASports",
+    choices=[d.name for d in Dataset],
+    help="Dataset to use for the experiment.",
+)
+parser.add_argument(
+    "--model",
+    type=str,
+    default="BERT",
+    choices=[attr.name for attr in DocReader],
+    help="Document reader model to use.",
+)
+parser.add_argument(
+    "--sport",
+    type=str,
+    default="ALL",
+    choices=[attr.name for attr in Sports],
+    help="Sport to filter for QASports dataset.",
+)
+
+args = parser.parse_args()
+
+DATASET = Dataset[args.dataset]
+DOC_READER = DocReader[args.model].value
+SPORT = Sports[args.sport].value
+print(f"Dataset: {DATASET} // Sport: {SPORT}")
+print(f"Model: {DOC_READER}")
+
+
+# Download the dataset
+def dataset_switch(choice):
+    """Get dataset class"""
+
+    if choice == Dataset.SQuAD:
+        return SQuadDataset()
+    elif choice == Dataset.AdvQA:
+        return AdversarialQADataset()
+    elif choice == Dataset.DuoRC:
+        return DuoRCDataset()
+    elif choice == Dataset.QASports:
+        return QASportsDataset(SPORT)
+    else:
+        return "Invalid dataset"
+
+
+# Get the dataset
+dataset = dataset_switch(DATASET)
+docs = dataset.get_documents()
+
+"""---
+## Document Reader
+
+In this experiment, we explored three Transformer based models for extractive Question Answering using the [FARM framework](https://github.com/deepset-ai/FARM).
+* [BERT paper](https://arxiv.org/abs/1810.04805), [implementation](https://huggingface.co/deepset/bert-base-uncased-squad2)
+* [RoBERTa paper](https://arxiv.org/abs/1907.11692), [implementation](https://huggingface.co/deepset/roberta-base-squad2)
+* [MiniLM paper](https://arxiv.org/abs/2002.10957), [implementation](https://huggingface.co/deepset/minilm-uncased-squad2)
+
+"""
+
+# Get the reader
+reader = FARMReader(DOC_READER, use_gpu=True)
+
+# Build the pipeline
+pipe = Pipeline()
+pipe.add_node(component=reader, name="Reader", inputs=["Query"])
+
+# # Querying documents
+# question = "Who did the Raptors face in the first round of the 2015 Playoffs?"
+# prediction = pipe.run(
+#     query=question, documents=docs[0:10], params={"Reader": {"top_k": 3}}
+# )
+
+# # Print answer
+# print_answers(prediction)
+
+"""---
+## Evaluation
+
+About the metrics, you can read the [evaluation](https://docs.haystack.deepset.ai/docs/evaluation) web page.
+"""
+
+# Commented out IPython magic to ensure Python compatibility.
+# %%time
+#
+# For testing purposes, running on the first 100 labels
+# For real evaluation, you must remove the [0:100]
+eval_labels = dataset.get_validation()
+eval_docs = [
+    [label.document for label in multi_label.labels] for multi_label in eval_labels
+]
+
+eval_result = pipe.eval(
+    labels=eval_labels, documents=eval_docs, params={"Reader": {"top_k": 1}}
+)
+
+# Get and print the metrics
+metrics = eval_result.calculate_metrics()
+print(metrics)