|
125 | 125 | }, |
126 | 126 | { |
127 | 127 | "cell_type": "code", |
128 | | - "execution_count": 11, |
| 128 | + "execution_count": 5, |
129 | 129 | "id": "751dc988", |
130 | 130 | "metadata": {}, |
131 | 131 | "outputs": [], |
|
159 | 159 | }, |
160 | 160 | { |
161 | 161 | "cell_type": "code", |
162 | | - "execution_count": 6, |
| 162 | + "execution_count": 7, |
163 | 163 | "id": "9875132a", |
164 | 164 | "metadata": {}, |
165 | 165 | "outputs": [], |
|
191 | 191 | }, |
192 | 192 | { |
193 | 193 | "cell_type": "code", |
194 | | - "execution_count": 12, |
| 194 | + "execution_count": 8, |
195 | 195 | "id": "05633cc2", |
196 | 196 | "metadata": {}, |
197 | 197 | "outputs": [ |
|
206 | 206 | "name": "stderr", |
207 | 207 | "output_type": "stream", |
208 | 208 | "text": [ |
209 | | - "100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [01:12<00:00, 72.16s/it]\n" |
| 209 | + "100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [01:00<00:00, 60.01s/it]\n" |
210 | 210 | ] |
211 | 211 | }, |
212 | 212 | { |
|
220 | 220 | "name": "stderr", |
221 | 221 | "output_type": "stream", |
222 | 222 | "text": [ |
223 | | - "100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:12<00:00, 12.74s/it]\n" |
| 223 | + "100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:11<00:00, 11.67s/it]\n" |
224 | 224 | ] |
225 | 225 | }, |
226 | 226 | { |
227 | 227 | "name": "stdout", |
228 | 228 | "output_type": "stream", |
229 | 229 | "text": [ |
230 | | - "evaluating with [context_ relevancy]\n" |
| 230 | + "evaluating with [context_relevancy]\n" |
231 | 231 | ] |
232 | 232 | }, |
233 | 233 | { |
234 | 234 | "name": "stderr", |
235 | 235 | "output_type": "stream", |
236 | 236 | "text": [ |
237 | | - "100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:39<00:00, 39.72s/it]\n" |
| 237 | + "100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:46<00:00, 46.11s/it]\n" |
238 | 238 | ] |
239 | 239 | }, |
240 | 240 | { |
|
248 | 248 | "name": "stderr", |
249 | 249 | "output_type": "stream", |
250 | 250 | "text": [ |
251 | | - "100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:20<00:00, 20.26s/it]\n" |
| 251 | + "100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:13<00:00, 13.82s/it]\n" |
252 | 252 | ] |
253 | 253 | }, |
254 | 254 | { |
|
262 | 262 | "name": "stderr", |
263 | 263 | "output_type": "stream", |
264 | 264 | "text": [ |
265 | | - "100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:31<00:00, 31.83s/it]\n" |
| 265 | + "100%|βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ| 1/1 [00:39<00:00, 39.68s/it]\n" |
266 | 266 | ] |
267 | 267 | } |
268 | 268 | ], |
|
274 | 274 | }, |
275 | 275 | { |
276 | 276 | "cell_type": "code", |
277 | | - "execution_count": 13, |
| 277 | + "execution_count": 9, |
278 | 278 | "id": "f927a943", |
279 | 279 | "metadata": {}, |
280 | 280 | "outputs": [ |
281 | 281 | { |
282 | 282 | "name": "stdout", |
283 | 283 | "output_type": "stream", |
284 | 284 | "text": [ |
285 | | - "{'ragas_score': 0.4150, 'faithfulness': 0.7000, 'answer_relevancy': 0.9550, 'context_ relevancy': 0.1622, 'harmfulness': 0.0000, 'context_recall': 1.0000}\n" |
| 285 | + "{'ragas_score': 0.5228, 'faithfulness': 0.7000, 'answer_relevancy': 0.9565, 'context_relevancy': 0.2406, 'harmfulness': 0.0000, 'context_recall': 0.9800}\n" |
286 | 286 | ] |
287 | 287 | } |
288 | 288 | ], |
|
301 | 301 | }, |
302 | 302 | { |
303 | 303 | "cell_type": "code", |
304 | | - "execution_count": 14, |
| 304 | + "execution_count": 10, |
305 | 305 | "id": "b96311e2", |
306 | 306 | "metadata": {}, |
307 | 307 | "outputs": [ |
|
327 | 327 | " <tr style=\"text-align: right;\">\n", |
328 | 328 | " <th></th>\n", |
329 | 329 | " <th>question</th>\n", |
330 | | - " <th>answer</th>\n", |
331 | 330 | " <th>contexts</th>\n", |
| 331 | + " <th>answer</th>\n", |
332 | 332 | " <th>ground_truths</th>\n", |
333 | 333 | " <th>faithfulness</th>\n", |
334 | 334 | " <th>answer_relevancy</th>\n", |
335 | | - " <th>context_ relevancy</th>\n", |
| 335 | + " <th>context_relevancy</th>\n", |
336 | 336 | " <th>harmfulness</th>\n", |
337 | 337 | " <th>context_recall</th>\n", |
338 | 338 | " </tr>\n", |
|
341 | 341 | " <tr>\n", |
342 | 342 | " <th>0</th>\n", |
343 | 343 | " <td>What is the population of New York City as of ...</td>\n", |
344 | | - " <td>\\nThe population of New York City as of 2020 i...</td>\n", |
345 | 344 | " <td>[Aeromedical Staging Squadron, and a military ...</td>\n", |
| 345 | + " <td>\\nThe population of New York City as of 2020 i...</td>\n", |
346 | 346 | " <td>[8,804,000]</td>\n", |
347 | 347 | " <td>1.0</td>\n", |
348 | | - " <td>0.999999</td>\n", |
349 | | - " <td>0.161345</td>\n", |
| 348 | + " <td>1.000000</td>\n", |
| 349 | + " <td>0.320000</td>\n", |
350 | 350 | " <td>0</td>\n", |
351 | 351 | " <td>1.0</td>\n", |
352 | 352 | " </tr>\n", |
353 | 353 | " <tr>\n", |
354 | 354 | " <th>1</th>\n", |
355 | 355 | " <td>Which borough of New York City has the highest...</td>\n", |
356 | | - " <td>\\nThe borough of Manhattan has the highest pop...</td>\n", |
357 | 356 | " <td>[co-extensive with New York County, the boroug...</td>\n", |
| 357 | + " <td>\\nThe borough of Manhattan has the highest pop...</td>\n", |
358 | 358 | " <td>[Queens]</td>\n", |
359 | 359 | " <td>0.0</td>\n", |
360 | | - " <td>0.998528</td>\n", |
361 | | - " <td>0.046342</td>\n", |
| 360 | + " <td>0.998525</td>\n", |
| 361 | + " <td>0.038462</td>\n", |
362 | 362 | " <td>0</td>\n", |
363 | | - " <td>1.0</td>\n", |
| 363 | + " <td>0.9</td>\n", |
364 | 364 | " </tr>\n", |
365 | 365 | " <tr>\n", |
366 | 366 | " <th>2</th>\n", |
367 | 367 | " <td>What is the economic significance of New York ...</td>\n", |
368 | | - " <td>\\nNew York City is a major global economic cen...</td>\n", |
369 | 368 | " <td>[health care and life sciences, medical techno...</td>\n", |
| 369 | + " <td>\\nNew York City is a major global economic cen...</td>\n", |
370 | 370 | " <td>[New York City's economic significance is vast...</td>\n", |
371 | 371 | " <td>1.0</td>\n", |
372 | | - " <td>0.903937</td>\n", |
373 | | - " <td>0.407880</td>\n", |
| 372 | + " <td>0.911303</td>\n", |
| 373 | + " <td>0.384615</td>\n", |
374 | 374 | " <td>0</td>\n", |
375 | 375 | " <td>1.0</td>\n", |
376 | 376 | " </tr>\n", |
377 | 377 | " <tr>\n", |
378 | 378 | " <th>3</th>\n", |
379 | 379 | " <td>How did New York City get its name?</td>\n", |
380 | | - " <td>\\nNew York City was named in honor of the Duke...</td>\n", |
381 | 380 | " <td>[a US$1 billion research and education center ...</td>\n", |
| 381 | + " <td>\\nNew York City was named in honor of the Duke...</td>\n", |
382 | 382 | " <td>[New York City got its name when it came under...</td>\n", |
383 | 383 | " <td>1.0</td>\n", |
384 | | - " <td>0.929809</td>\n", |
385 | | - " <td>0.057195</td>\n", |
| 384 | + " <td>0.929792</td>\n", |
| 385 | + " <td>0.407407</td>\n", |
386 | 386 | " <td>0</td>\n", |
387 | 387 | " <td>1.0</td>\n", |
388 | 388 | " </tr>\n", |
389 | 389 | " <tr>\n", |
390 | 390 | " <th>4</th>\n", |
391 | 391 | " <td>What is the significance of the Statue of Libe...</td>\n", |
392 | | - " <td>\\nThe Statue of Liberty is a symbol of the Uni...</td>\n", |
393 | 392 | " <td>[(stylized I β€ NY) is both a logo and a song t...</td>\n", |
| 393 | + " <td>\\nThe Statue of Liberty is a symbol of the Uni...</td>\n", |
394 | 394 | " <td>[The Statue of Liberty in New York City holds ...</td>\n", |
395 | 395 | " <td>0.5</td>\n", |
396 | | - " <td>0.942681</td>\n", |
397 | | - " <td>0.138449</td>\n", |
| 396 | + " <td>0.942658</td>\n", |
| 397 | + " <td>0.052632</td>\n", |
398 | 398 | " <td>0</td>\n", |
399 | 399 | " <td>1.0</td>\n", |
400 | 400 | " </tr>\n", |
|
410 | 410 | "3 How did New York City get its name? \n", |
411 | 411 | "4 What is the significance of the Statue of Libe... \n", |
412 | 412 | "\n", |
413 | | - " answer \\\n", |
414 | | - "0 \\nThe population of New York City as of 2020 i... \n", |
415 | | - "1 \\nThe borough of Manhattan has the highest pop... \n", |
416 | | - "2 \\nNew York City is a major global economic cen... \n", |
417 | | - "3 \\nNew York City was named in honor of the Duke... \n", |
418 | | - "4 \\nThe Statue of Liberty is a symbol of the Uni... \n", |
419 | | - "\n", |
420 | 413 | " contexts \\\n", |
421 | 414 | "0 [Aeromedical Staging Squadron, and a military ... \n", |
422 | 415 | "1 [co-extensive with New York County, the boroug... \n", |
423 | 416 | "2 [health care and life sciences, medical techno... \n", |
424 | 417 | "3 [a US$1 billion research and education center ... \n", |
425 | 418 | "4 [(stylized I β€ NY) is both a logo and a song t... \n", |
426 | 419 | "\n", |
| 420 | + " answer \\\n", |
| 421 | + "0 \\nThe population of New York City as of 2020 i... \n", |
| 422 | + "1 \\nThe borough of Manhattan has the highest pop... \n", |
| 423 | + "2 \\nNew York City is a major global economic cen... \n", |
| 424 | + "3 \\nNew York City was named in honor of the Duke... \n", |
| 425 | + "4 \\nThe Statue of Liberty is a symbol of the Uni... \n", |
| 426 | + "\n", |
427 | 427 | " ground_truths faithfulness \\\n", |
428 | 428 | "0 [8,804,000] 1.0 \n", |
429 | 429 | "1 [Queens] 0.0 \n", |
430 | 430 | "2 [New York City's economic significance is vast... 1.0 \n", |
431 | 431 | "3 [New York City got its name when it came under... 1.0 \n", |
432 | 432 | "4 [The Statue of Liberty in New York City holds ... 0.5 \n", |
433 | 433 | "\n", |
434 | | - " answer_relevancy context_ relevancy harmfulness context_recall \n", |
435 | | - "0 0.999999 0.161345 0 1.0 \n", |
436 | | - "1 0.998528 0.046342 0 1.0 \n", |
437 | | - "2 0.903937 0.407880 0 1.0 \n", |
438 | | - "3 0.929809 0.057195 0 1.0 \n", |
439 | | - "4 0.942681 0.138449 0 1.0 " |
| 434 | + " answer_relevancy context_relevancy harmfulness context_recall \n", |
| 435 | + "0 1.000000 0.320000 0 1.0 \n", |
| 436 | + "1 0.998525 0.038462 0 0.9 \n", |
| 437 | + "2 0.911303 0.384615 0 1.0 \n", |
| 438 | + "3 0.929792 0.407407 0 1.0 \n", |
| 439 | + "4 0.942658 0.052632 0 1.0 " |
440 | 440 | ] |
441 | 441 | }, |
442 | | - "execution_count": 14, |
| 442 | + "execution_count": 10, |
443 | 443 | "metadata": {}, |
444 | 444 | "output_type": "execute_result" |
445 | 445 | } |
|
0 commit comments