We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a2000c3 commit cfb9ea9Copy full SHA for cfb9ea9
fms_mo/utils/calib_data.py
@@ -26,7 +26,7 @@
26
27
# Third Party
28
from datasets import load_dataset, load_from_disk
29
-from transformers import AutoTokenizer, BatchEncoding
+from transformers import BatchEncoding
30
import datasets
31
import torch
32
@@ -260,12 +260,12 @@ def get_self_instruct_starcoder(
260
}
261
for k in range(nsamples):
262
tokenized = tokenizer(
263
- cr_dataset[k]["output"], return_tensors="pt",
+ cr_dataset[k]["output"], return_tensors="pt",
264
padding="max_length", max_length = seqlen
265
)
266
trainloader["input_ids"][k] = tokenized.input_ids.squeeze(0)
267
trainloader["attention_mask"][k] = tokenized.attention_mask.squeeze(0)
268
-
+
269
return trainloader, eval_dataset
270
271
0 commit comments