@@ -78,7 +78,13 @@ def __getitem__(self, idx: int) -> Tuple[str, str]:
7878
7979 tokens = row ["code_tokens" ]
8080 body_tokens = tokens [tokens .index (fn_name ) + 2 :]
81- fn_body_tokens = body_tokens [body_tokens .index ("{" ) + 1 : len (body_tokens ) - 1 ]
81+ try :
82+ fn_body_tokens = body_tokens [
83+ body_tokens .index ("{" ) + 1 : len (body_tokens ) - 1
84+ ]
85+ except ValueError as ve : # '{' might be missing
86+ logging .error ("'%s' fn body extraction failed: %s" , body_tokens , ve )
87+ fn_body_tokens = None
8288
8389 return (fn_name , fn_body , fn_body_tokens )
8490
@@ -91,6 +97,7 @@ def __len__(self) -> int:
9197from functools import lru_cache
9298from typing import List
9399
100+
94101def split_camelcase (camel_case_identifier : str ) -> List [str ]:
95102 """
96103 Split camelCase identifiers.
@@ -158,7 +165,13 @@ def main(args: Namespace) -> None:
158165 for fn_name , fn_body , fn_body_tokens in dataset :
159166 if not fn_name or not fn_body :
160167 continue
161- src = " " .join (fn_body_tokens ) if args .token_level_sources else fn_body
168+
169+ if args .token_level_sources :
170+ if not fn_body_tokens :
171+ continue
172+ src = " " .join (fn_body_tokens ).replace ("\n " , args .newline )
173+ else :
174+ src = fn_body
162175
163176 if args .word_level_targets :
164177 tgt = fn_name
0 commit comments