fix: llamaIndex import fixes (#156)

jjmachan · web-flow · commit 7a128466b9c6 · 2023-09-26T15:30:43.000+05:30
fixes: #144 fixes: - llamaIndex incorrect import - dependencies specified - outdated metrics documentation
diff --git a/docs/integrations/llamaindex.ipynb b/docs/integrations/llamaindex.ipynb
@@ -125,7 +125,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
    "id": "751dc988",
    "metadata": {},
    "outputs": [],
@@ -159,7 +159,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
    "id": "9875132a",
    "metadata": {},
    "outputs": [],
@@ -191,7 +191,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 8,
    "id": "05633cc2",
    "metadata": {},
    "outputs": [
@@ -206,7 +206,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|█████████████████████████████████████████████████████████████| 1/1 [01:12<00:00, 72.16s/it]\n"
+      "100%|█████████████████████████████████████████████████████████████| 1/1 [01:00<00:00, 60.01s/it]\n"
      ]
     },
     {
@@ -220,21 +220,21 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|█████████████████████████████████████████████████████████████| 1/1 [00:12<00:00, 12.74s/it]\n"
+      "100%|█████████████████████████████████████████████████████████████| 1/1 [00:11<00:00, 11.67s/it]\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "evaluating with [context_ relevancy]\n"
+      "evaluating with [context_relevancy]\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|█████████████████████████████████████████████████████████████| 1/1 [00:39<00:00, 39.72s/it]\n"
+      "100%|█████████████████████████████████████████████████████████████| 1/1 [00:46<00:00, 46.11s/it]\n"
      ]
     },
     {
@@ -248,7 +248,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|█████████████████████████████████████████████████████████████| 1/1 [00:20<00:00, 20.26s/it]\n"
+      "100%|█████████████████████████████████████████████████████████████| 1/1 [00:13<00:00, 13.82s/it]\n"
      ]
     },
     {
@@ -262,7 +262,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|█████████████████████████████████████████████████████████████| 1/1 [00:31<00:00, 31.83s/it]\n"
+      "100%|█████████████████████████████████████████████████████████████| 1/1 [00:39<00:00, 39.68s/it]\n"
      ]
     }
    ],
@@ -274,15 +274,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 9,
    "id": "f927a943",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'ragas_score': 0.4150, 'faithfulness': 0.7000, 'answer_relevancy': 0.9550, 'context_ relevancy': 0.1622, 'harmfulness': 0.0000, 'context_recall': 1.0000}\n"
+      "{'ragas_score': 0.5228, 'faithfulness': 0.7000, 'answer_relevancy': 0.9565, 'context_relevancy': 0.2406, 'harmfulness': 0.0000, 'context_recall': 0.9800}\n"
      ]
     }
    ],
@@ -301,7 +301,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 10,
    "id": "b96311e2",
    "metadata": {},
    "outputs": [
@@ -327,12 +327,12 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>question</th>\n",
-       "      <th>answer</th>\n",
        "      <th>contexts</th>\n",
+       "      <th>answer</th>\n",
        "      <th>ground_truths</th>\n",
        "      <th>faithfulness</th>\n",
        "      <th>answer_relevancy</th>\n",
-       "      <th>context_ relevancy</th>\n",
+       "      <th>context_relevancy</th>\n",
        "      <th>harmfulness</th>\n",
        "      <th>context_recall</th>\n",
        "    </tr>\n",
@@ -341,60 +341,60 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>What is the population of New York City as of ...</td>\n",
-       "      <td>\\nThe population of New York City as of 2020 i...</td>\n",
        "      <td>[Aeromedical Staging Squadron, and a military ...</td>\n",
+       "      <td>\\nThe population of New York City as of 2020 i...</td>\n",
        "      <td>[8,804,000]</td>\n",
        "      <td>1.0</td>\n",
-       "      <td>0.999999</td>\n",
-       "      <td>0.161345</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.320000</td>\n",
        "      <td>0</td>\n",
        "      <td>1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Which borough of New York City has the highest...</td>\n",
-       "      <td>\\nThe borough of Manhattan has the highest pop...</td>\n",
        "      <td>[co-extensive with New York County, the boroug...</td>\n",
+       "      <td>\\nThe borough of Manhattan has the highest pop...</td>\n",
        "      <td>[Queens]</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.998528</td>\n",
-       "      <td>0.046342</td>\n",
+       "      <td>0.998525</td>\n",
+       "      <td>0.038462</td>\n",
        "      <td>0</td>\n",
-       "      <td>1.0</td>\n",
+       "      <td>0.9</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>What is the economic significance of New York ...</td>\n",
-       "      <td>\\nNew York City is a major global economic cen...</td>\n",
        "      <td>[health care and life sciences, medical techno...</td>\n",
+       "      <td>\\nNew York City is a major global economic cen...</td>\n",
        "      <td>[New York City's economic significance is vast...</td>\n",
        "      <td>1.0</td>\n",
-       "      <td>0.903937</td>\n",
-       "      <td>0.407880</td>\n",
+       "      <td>0.911303</td>\n",
+       "      <td>0.384615</td>\n",
        "      <td>0</td>\n",
        "      <td>1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>How did New York City get its name?</td>\n",
-       "      <td>\\nNew York City was named in honor of the Duke...</td>\n",
        "      <td>[a US$1 billion research and education center ...</td>\n",
+       "      <td>\\nNew York City was named in honor of the Duke...</td>\n",
        "      <td>[New York City got its name when it came under...</td>\n",
        "      <td>1.0</td>\n",
-       "      <td>0.929809</td>\n",
-       "      <td>0.057195</td>\n",
+       "      <td>0.929792</td>\n",
+       "      <td>0.407407</td>\n",
        "      <td>0</td>\n",
        "      <td>1.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>What is the significance of the Statue of Libe...</td>\n",
-       "      <td>\\nThe Statue of Liberty is a symbol of the Uni...</td>\n",
        "      <td>[(stylized I ❤ NY) is both a logo and a song t...</td>\n",
+       "      <td>\\nThe Statue of Liberty is a symbol of the Uni...</td>\n",
        "      <td>[The Statue of Liberty in New York City holds ...</td>\n",
        "      <td>0.5</td>\n",
-       "      <td>0.942681</td>\n",
-       "      <td>0.138449</td>\n",
+       "      <td>0.942658</td>\n",
+       "      <td>0.052632</td>\n",
        "      <td>0</td>\n",
        "      <td>1.0</td>\n",
        "    </tr>\n",
@@ -410,36 +410,36 @@
        "3                How did New York City get its name?   \n",
        "4  What is the significance of the Statue of Libe...   \n",
        "\n",
-       "                                              answer  \\\n",
-       "0  \\nThe population of New York City as of 2020 i...   \n",
-       "1  \\nThe borough of Manhattan has the highest pop...   \n",
-       "2  \\nNew York City is a major global economic cen...   \n",
-       "3  \\nNew York City was named in honor of the Duke...   \n",
-       "4  \\nThe Statue of Liberty is a symbol of the Uni...   \n",
-       "\n",
        "                                            contexts  \\\n",
        "0  [Aeromedical Staging Squadron, and a military ...   \n",
        "1  [co-extensive with New York County, the boroug...   \n",
        "2  [health care and life sciences, medical techno...   \n",
        "3  [a US$1 billion research and education center ...   \n",
        "4  [(stylized I ❤ NY) is both a logo and a song t...   \n",
        "\n",
+       "                                              answer  \\\n",
+       "0  \\nThe population of New York City as of 2020 i...   \n",
+       "1  \\nThe borough of Manhattan has the highest pop...   \n",
+       "2  \\nNew York City is a major global economic cen...   \n",
+       "3  \\nNew York City was named in honor of the Duke...   \n",
+       "4  \\nThe Statue of Liberty is a symbol of the Uni...   \n",
+       "\n",
        "                                       ground_truths  faithfulness  \\\n",
        "0                                        [8,804,000]           1.0   \n",
        "1                                           [Queens]           0.0   \n",
        "2  [New York City's economic significance is vast...           1.0   \n",
        "3  [New York City got its name when it came under...           1.0   \n",
        "4  [The Statue of Liberty in New York City holds ...           0.5   \n",
        "\n",
-       "   answer_relevancy  context_ relevancy  harmfulness  context_recall  \n",
-       "0          0.999999            0.161345            0             1.0  \n",
-       "1          0.998528            0.046342            0             1.0  \n",
-       "2          0.903937            0.407880            0             1.0  \n",
-       "3          0.929809            0.057195            0             1.0  \n",
-       "4          0.942681            0.138449            0             1.0  "
+       "   answer_relevancy  context_relevancy  harmfulness  context_recall  \n",
+       "0          1.000000           0.320000            0             1.0  \n",
+       "1          0.998525           0.038462            0             0.9  \n",
+       "2          0.911303           0.384615            0             1.0  \n",
+       "3          0.929792           0.407407            0             1.0  \n",
+       "4          0.942658           0.052632            0             1.0  "
       ]
      },
-     "execution_count": 14,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/docs/metrics.md b/docs/metrics.md
@@ -4,7 +4,7 @@
 
 This measures the factual consistency of the generated answer against the given context. This is done using a multi step paradigm that includes creation of statements from the generated answer followed by verifying each of these statements against the context. It is calculated from `answer` and `retrieved context`. The answer is scaled to (0,1) range. Higher the better.
 ```python
-from ragas.metrics.factuality import Faithfulness
+from ragas.metrics.faithfulness import Faithfulness
 faithfulness = Faithfulness()
 
 # Dataset({
@@ -19,22 +19,26 @@ results = faithfulness.score(dataset)
 
 This measures how relevant is the retrieved context to the prompt. This is done using a combination of OpenAI models and cross-encoder models. To improve the score one can try to optimize the amount of information present in the retrieved context. It is calculated from `question` and `retrieved context`. 
 ```python
-from ragas.metrics.context_relevancy import ContextRelevancy
-context_rel = ContextRelevancy(strictness=3)
+from ragas.metrics import ContextRelevancy
+context_relevancy = ContextRelevancy(strictness=3)
+
+# run init models to load the models used
+context_relevancy.init_model()
+
 # Dataset({
 #     features: ['question','contexts'],
 #     num_rows: 25
 # })
 dataset: Dataset
 
-results = context_rel.score(dataset)
+results = context_relevancy.score(dataset)
 ```
 
 ### `Context Recall`
 measures the recall of the retrieved context using annotated answer as ground truth. Annotated answer is taken as proxy for ground truth context. It is calculated from `ground truth` and `retrieved context`.
 
 ```python
-from ragas.metrics.context_recall import ContextRecall
+from ragas.metrics import ContextRecall
 context_recall = ContextRecall()
 # Dataset({
 #     features: ['contexts','ground_truths'],
@@ -50,8 +54,12 @@ results = context_recall.score(dataset)
 
 This measures how relevant is the generated answer to the prompt. If the generated answer is incomplete or contains redundant information the score will be low. This is quantified by working out the chance of an LLM generating the given question using the generated answer. It is calculated from `question` and `answer`. Values range (0,1), higher the better.
 ```python
-from ragas.metrics.answer_relevancy import AnswerRelevancy
+from ragas.metrics import AnswerRelevancy
 answer_relevancy = AnswerRelevancy()
+
+# init_model to load models used
+answer_relevancy.init_model()
+
 # Dataset({
 #     features: ['question','answer'],
 #     num_rows: 25
@@ -74,7 +82,6 @@ from ragas.metrics.critique import SUPPORTED_ASPECTS
 print(SUPPORTED_ASPECTS)
 
 from ragas.metrics.critique import conciseness
-from ragas
 # Dataset({
 #     features: ['question','answer'],
 #     num_rows: 25
@@ -88,6 +95,7 @@ results = conciseness.score(dataset)
 from ragas.metrics.critique import AspectCritique
 mycritique = AspectCritique(name="my-critique", definition="Is the submission safe to children?", strictness=2)
 
+results = mycritique.score(dataset)
 ```  
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -5,7 +5,8 @@ dependencies = [
     "transformers",
     "sentence-transformers",
     "datasets",
-    "langchain>=0.0.218",
+    "tiktoken",
+    "langchain>=0.0.288",
     "openai",
     "pydantic<2.0",
     "pysbd>=0.3.4",
diff --git a/src/ragas/llama_index/evaluation.py b/src/ragas/llama_index/evaluation.py
@@ -3,9 +3,9 @@
 import typing as t
 
 from datasets import Dataset
-from rich.repr import Result
 
 from ragas import evaluate as ragas_evaluate
+from ragas.evaluation import Result
 from ragas.metrics.base import Metric
 
 if t.TYPE_CHECKING:
diff --git a/src/ragas/metrics/__init__.py b/src/ragas/metrics/__init__.py
@@ -2,7 +2,9 @@
 from ragas.metrics.context_recall import ContextRecall, context_recall
 from ragas.metrics.context_relevance import ContextRelevancy, context_relevancy
 from ragas.metrics.critique import AspectCritique
-from ragas.metrics.faithfulnes import Faithfulness, faithfulness
+from ragas.metrics.faithfulness import Faithfulness, faithfulness
+
+DEFAULT_METRICS = [answer_relevancy, context_relevancy, faithfulness, context_recall]
 
 __all__ = [
     "Faithfulness",
diff --git a/src/ragas/metrics/faithfulness.py b/src/ragas/metrics/faithfulness.py