|
12 | 12 | "- [Completion LLMs Supported](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.llms)\n", |
13 | 13 | "- [Chat based LLMs Supported](https://api.python.langchain.com/en/latest/api_reference.html#module-langchain.chat_models)\n", |
14 | 14 | "\n", |
15 | | - "This guide will show you how to use another or LLM API for evaluation." |
| 15 | + "This guide will show you how to use another or LLM API for evaluation.\n", |
| 16 | + "\n", |
| 17 | + "> **Note**: If your looking to use Azure OpenAI for evaluation checkout [this guide](./quickstart-azure-openai.ipynb)" |
16 | 18 | ] |
17 | 19 | }, |
18 | 20 | { |
|
29 | 31 | }, |
30 | 32 | { |
31 | 33 | "cell_type": "code", |
32 | | - "execution_count": 5, |
| 34 | + "execution_count": null, |
33 | 35 | "id": "a6d96660", |
34 | 36 | "metadata": {}, |
35 | 37 | "outputs": [], |
|
57 | 59 | "id": "f1fdb48b", |
58 | 60 | "metadata": {}, |
59 | 61 | "source": [ |
60 | | - "Now initialise `Faithfulness` with `gpt4`" |
| 62 | + "Now lets replace the `llm` used in faithfulness with `gpt4`" |
61 | 63 | ] |
62 | 64 | }, |
63 | 65 | { |
64 | 66 | "cell_type": "code", |
65 | | - "execution_count": 9, |
| 67 | + "execution_count": 2, |
66 | 68 | "id": "307321ed", |
67 | 69 | "metadata": {}, |
68 | 70 | "outputs": [], |
69 | 71 | "source": [ |
70 | | - "from ragas.metrics import Faithfulness\n", |
| 72 | + "from ragas.metrics import faithfulness\n", |
71 | 73 | "\n", |
72 | | - "faithfulness_gpt4 = Faithfulness(name=\"faithfulness_gpt4\", llm=gpt4, batch_size=3)" |
| 74 | + "faithfulness.llm = gpt4" |
73 | 75 | ] |
74 | 76 | }, |
75 | 77 | { |
76 | 78 | "cell_type": "markdown", |
77 | 79 | "id": "1930dd49", |
78 | 80 | "metadata": {}, |
79 | 81 | "source": [ |
80 | | - "That's it!\n", |
| 82 | + "That's it! faithfulness will now be using GPT-4 under the hood for evaluations.\n", |
81 | 83 | "\n", |
82 | 84 | "Now lets run the evaluations using the example from [quickstart](../quickstart.ipnb)." |
83 | 85 | ] |
84 | 86 | }, |
85 | 87 | { |
86 | 88 | "cell_type": "code", |
87 | | - "execution_count": 6, |
| 89 | + "execution_count": 3, |
88 | 90 | "id": "62c0eadb", |
89 | 91 | "metadata": {}, |
90 | 92 | "outputs": [ |
|
98 | 100 | { |
99 | 101 | "data": { |
100 | 102 | "application/vnd.jupyter.widget-view+json": { |
101 | | - "model_id": "c55f09ffe1094e6190c255c09c0eb141", |
| 103 | + "model_id": "9fb581d4057d4e70a0b70830b2f5f487", |
102 | 104 | "version_major": 2, |
103 | 105 | "version_minor": 0 |
104 | 106 | }, |
|
120 | 122 | "})" |
121 | 123 | ] |
122 | 124 | }, |
123 | | - "execution_count": 6, |
| 125 | + "execution_count": 3, |
124 | 126 | "metadata": {}, |
125 | 127 | "output_type": "execute_result" |
126 | 128 | } |
|
135 | 137 | }, |
136 | 138 | { |
137 | 139 | "cell_type": "code", |
138 | | - "execution_count": 10, |
| 140 | + "execution_count": 5, |
139 | 141 | "id": "c4396f6e", |
140 | 142 | "metadata": {}, |
141 | 143 | "outputs": [ |
142 | 144 | { |
143 | 145 | "name": "stdout", |
144 | 146 | "output_type": "stream", |
145 | 147 | "text": [ |
146 | | - "evaluating with [faithfulness_gpt4]\n" |
| 148 | + "evaluating with [faithfulness]\n" |
147 | 149 | ] |
148 | 150 | }, |
149 | 151 | { |
150 | 152 | "name": "stderr", |
151 | 153 | "output_type": "stream", |
152 | 154 | "text": [ |
153 | | - "100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 10/10 [15:38<00:00, 93.84s/it]\n" |
| 155 | + "100%|ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 2/2 [22:28<00:00, 674.38s/it]\n" |
154 | 156 | ] |
155 | 157 | }, |
156 | 158 | { |
157 | 159 | "data": { |
158 | 160 | "text/plain": [ |
159 | | - "{'faithfulness_gpt4': 0.6594}" |
| 161 | + "{'faithfulness': 0.7237}" |
160 | 162 | ] |
161 | 163 | }, |
162 | | - "execution_count": 10, |
| 164 | + "execution_count": 5, |
163 | 165 | "metadata": {}, |
164 | 166 | "output_type": "execute_result" |
165 | 167 | } |
|
168 | 170 | "# evaluate\n", |
169 | 171 | "from ragas import evaluate\n", |
170 | 172 | "\n", |
171 | | - "result = evaluate(fiqa_eval[\"baseline\"], metrics=[faithfulness_gpt4])\n", |
| 173 | + "result = evaluate(fiqa_eval[\"baseline\"], metrics=[faithfulness])\n", |
172 | 174 | "\n", |
173 | 175 | "result" |
174 | 176 | ] |
|
0 commit comments