@@ -15,7 +15,7 @@ def __init__(
1515 num_surrounding_sentences : int = 1 ,
1616 similarity_threshold : float = 0.8 ,
1717 max_chunk_tokens : int = 200 ,
18- min_chunk_tokens : int = 50
18+ min_chunk_tokens : int = 50 ,
1919 ):
2020 self .num_surrounding_sentences = num_surrounding_sentences
2121 self .similarity_threshold = similarity_threshold
@@ -266,7 +266,7 @@ def look_ahead_and_behind_sentences(
266266 next_sentence_is_table_or_figure ,
267267 ) in enumerate (
268268 is_table_or_figure_map [
269- current_sentence_index : current_sentence_index
269+ current_sentence_index : current_sentence_index
270270 + surround_sentences_gap_to_test
271271 ]
272272 ):
@@ -300,8 +300,7 @@ def retrive_current_chunk_at_n(n):
300300 else :
301301 return current_chunk [n ]
302302
303- current_chunk_tokens = self .num_tokens_from_string (
304- " " .join (current_chunk ))
303+ current_chunk_tokens = self .num_tokens_from_string (" " .join (current_chunk ))
305304
306305 if len (current_chunk ) >= 2 and current_chunk_tokens >= self .min_chunk_tokens :
307306 logging .info ("Comparing chunks" )
@@ -403,13 +402,13 @@ def retrieve_current_chunk():
403402 new_is_table_or_figure_map .append (False )
404403 if forwards_direction :
405404 current_chunk = sentences [
406- current_sentence_index : current_sentence_index
405+ current_sentence_index : current_sentence_index
407406 + min_of_distance_to_next_figure_or_num_surrounding_sentences
408407 ]
409408 else :
410409 current_chunk = sentences [
411- current_sentence_index : current_sentence_index
412- - min_of_distance_to_next_figure_or_num_surrounding_sentences : - 1
410+ current_sentence_index : current_sentence_index
411+ - min_of_distance_to_next_figure_or_num_surrounding_sentences : - 1
413412 ]
414413 index += min_of_distance_to_next_figure_or_num_surrounding_sentences
415414 continue
@@ -490,7 +489,7 @@ async def process_semantic_text_chunker(record: dict, text_chunker) -> dict:
490489 logging .error ("Chunking Error: %s" , e )
491490 return {
492491 "recordId" : record ["recordId" ],
493- "data" : {} ,
492+ "data" : None ,
494493 "errors" : [
495494 {
496495 "message" : "Failed to chunk data. Check function app logs for more details of exact failure."
0 commit comments