Skip to content

Commit 48ae599

Browse files
authored
Minor changes (#38)
* added readme * added how to use * added metrics to docs * update readme * set model_max_length * fix import paths
1 parent ee6a295 commit 48ae599

File tree

2 files changed

+35
-39
lines changed

2 files changed

+35
-39
lines changed

experiments/assesments/metrics_assesments.ipynb

Lines changed: 34 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,19 @@
3232
},
3333
{
3434
"cell_type": "code",
35-
"execution_count": 62,
35+
"execution_count": 1,
3636
"id": "7bfb2480",
3737
"metadata": {},
38-
"outputs": [],
38+
"outputs": [
39+
{
40+
"name": "stderr",
41+
"output_type": "stream",
42+
"text": [
43+
"/opt/anaconda3/envs/alerts/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
44+
" from .autonotebook import tqdm as notebook_tqdm\n"
45+
]
46+
}
47+
],
3948
"source": [
4049
"import json\n",
4150
"from datasets import load_dataset\n",
@@ -55,7 +64,7 @@
5564
"metadata": {},
5665
"outputs": [],
5766
"source": [
58-
"os.chdir(\"/Users/shahules/belar/\")"
67+
"os.chdir('/Users/shahules/belar/src/')"
5968
]
6069
},
6170
{
@@ -135,7 +144,7 @@
135144
},
136145
{
137146
"cell_type": "code",
138-
"execution_count": 129,
147+
"execution_count": 7,
139148
"id": "f9f4280e",
140149
"metadata": {},
141150
"outputs": [
@@ -144,7 +153,7 @@
144153
"output_type": "stream",
145154
"text": [
146155
"Found cached dataset parquet (/Users/shahules/.cache/huggingface/datasets/explodinggradients___parquet/explodinggradients--ragas-wikiqa-5b5116e5cb909aca/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n",
147-
"100%|█| 1/1 [00:00<00:00, 58.\n"
156+
"100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 242.78it/s]\n"
148157
]
149158
}
150159
],
@@ -162,7 +171,7 @@
162171
},
163172
{
164173
"cell_type": "code",
165-
"execution_count": 153,
174+
"execution_count": 8,
166175
"id": "eca20daf",
167176
"metadata": {},
168177
"outputs": [],
@@ -184,7 +193,7 @@
184193
},
185194
{
186195
"cell_type": "code",
187-
"execution_count": 8,
196+
"execution_count": 9,
188197
"id": "f3e35532",
189198
"metadata": {},
190199
"outputs": [],
@@ -216,7 +225,7 @@
216225
},
217226
{
218227
"cell_type": "code",
219-
"execution_count": 9,
228+
"execution_count": 10,
220229
"id": "335081e3",
221230
"metadata": {},
222231
"outputs": [],
@@ -252,7 +261,7 @@
252261
},
253262
{
254263
"cell_type": "code",
255-
"execution_count": 18,
264+
"execution_count": 11,
256265
"id": "b2642e5b",
257266
"metadata": {},
258267
"outputs": [],
@@ -267,7 +276,7 @@
267276
},
268277
{
269278
"cell_type": "code",
270-
"execution_count": 19,
279+
"execution_count": 13,
271280
"id": "26ca4af4",
272281
"metadata": {},
273282
"outputs": [
@@ -284,7 +293,7 @@
284293
"0"
285294
]
286295
},
287-
"execution_count": 19,
296+
"execution_count": 13,
288297
"metadata": {},
289298
"output_type": "execute_result"
290299
}
@@ -305,7 +314,7 @@
305314
},
306315
{
307316
"cell_type": "code",
308-
"execution_count": null,
317+
"execution_count": 14,
309318
"id": "ca1c56d6",
310319
"metadata": {},
311320
"outputs": [],
@@ -327,7 +336,7 @@
327336
},
328337
{
329338
"cell_type": "code",
330-
"execution_count": null,
339+
"execution_count": 15,
331340
"id": "cd7fed9c",
332341
"metadata": {},
333342
"outputs": [],
@@ -343,7 +352,7 @@
343352
},
344353
{
345354
"cell_type": "code",
346-
"execution_count": null,
355+
"execution_count": 16,
347356
"id": "35113558",
348357
"metadata": {},
349358
"outputs": [],
@@ -354,7 +363,7 @@
354363
},
355364
{
356365
"cell_type": "code",
357-
"execution_count": 16,
366+
"execution_count": 17,
358367
"id": "4e82d0df",
359368
"metadata": {},
360369
"outputs": [
@@ -368,10 +377,10 @@
368377
{
369378
"data": {
370379
"text/plain": [
371-
"3.514920235612768"
380+
"3.5533440372846865"
372381
]
373382
},
374-
"execution_count": 16,
383+
"execution_count": 17,
375384
"metadata": {},
376385
"output_type": "execute_result"
377386
}
@@ -399,40 +408,27 @@
399408
},
400409
{
401410
"cell_type": "code",
402-
"execution_count": 124,
411+
"execution_count": 13,
403412
"id": "cc263805",
404413
"metadata": {},
405414
"outputs": [],
406415
"source": [
407-
"from experimental.relevance import QGen"
416+
"from ragas.metrics.answer_relevance import QGen"
408417
]
409418
},
410419
{
411420
"cell_type": "code",
412-
"execution_count": 125,
421+
"execution_count": 14,
413422
"id": "38deaf06",
414423
"metadata": {},
415-
"outputs": [
416-
{
417-
"name": "stderr",
418-
"output_type": "stream",
419-
"text": [
420-
"/opt/anaconda3/envs/alerts/lib/python3.8/site-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
421-
"For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
422-
"- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.\n",
423-
"- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
424-
"- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
425-
" warnings.warn(\n"
426-
]
427-
}
428-
],
424+
"outputs": [],
429425
"source": [
430426
"t5_qgen = QGen(\"t5-base\", \"cpu\")"
431427
]
432428
},
433429
{
434430
"cell_type": "code",
435-
"execution_count": 126,
431+
"execution_count": 15,
436432
"id": "45942810",
437433
"metadata": {},
438434
"outputs": [],
@@ -457,7 +453,7 @@
457453
},
458454
{
459455
"cell_type": "code",
460-
"execution_count": 127,
456+
"execution_count": 16,
461457
"id": "ab00e4fe",
462458
"metadata": {},
463459
"outputs": [],
@@ -522,12 +518,12 @@
522518
},
523519
{
524520
"cell_type": "code",
525-
"execution_count": 23,
521+
"execution_count": 17,
526522
"id": "b6d76ae2",
527523
"metadata": {},
528524
"outputs": [],
529525
"source": [
530-
"## import cross encoder"
526+
"from ragas.metrics.context_relevance import context_relavancy"
531527
]
532528
},
533529
{

src/ragas/metrics/answer_relevance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
class QGen:
2525
def __init__(self, model_name: str, device: str) -> None:
2626
config = AutoConfig.from_pretrained(model_name)
27-
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
27+
self.tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=512)
2828
if self.tokenizer.pad_token is None:
2929
self.tokenizer.pad_token = "[PAD]"
3030
architecture = np.intersect1d(

0 commit comments

Comments
 (0)