Skip to content

Commit c9259e7

Browse files
authored
Merge pull request #66 from benlipkin/BigCodeFIM
added FIM tokens for bigcode/large-model
2 parents 3ad3b8d + 8160f5d commit c9259e7

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

lm_eval/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ def _make_infill_prompt(self, prefix, suffix):
9393
return f"{prefix}<|mask:0|>{suffix}<|mask:0|>"
9494
elif model_id in ["bigcode/santacoder"]:
9595
return f"<fim-prefix>{prefix}<fim-suffix>{suffix}<fim-middle>"
96+
elif model_id in ["bigcode/large-model"]:
97+
return f"<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>"
9698
else:
9799
raise ValueError(f"Infilling not yet supported for: {model_id}")
98100

@@ -158,6 +160,10 @@ def parse_infill(code, tokenizer):
158160
prefix, rest = code.split("<fim-suffix>", 1)
159161
suffix, infill = rest.split("<fim-middle>", 1)
160162
infill = infill.split("<|endoftext|>")[0]
163+
elif model_id in ["bigcode/large-model"]:
164+
prefix, rest = code.split("<fim_suffix>", 1)
165+
suffix, infill = rest.split("<fim_middle>", 1)
166+
infill = infill.split("<|endoftext|>")[0]
161167
else:
162168
raise ValueError(f"Infilling not yet supported for: {model_id}")
163169
for k, v in tokenizer.special_tokens_map.items():

0 commit comments

Comments
 (0)