IndexError for some texts using t5-small

For some texts, using t5-small, detect_frames returns a cryptic index error.

MRE: 

```
#the two texts below differ *only* in that text_2 has a trailing period

text_1 = "Well, I came out and put on these and run around. And uh I like to run. I see the car. And I don't know, I uh. Yeah they're over there is going to put up and have a party uh. It's a child getting water. And she's she's going to making something nice. That's my father. He goes everywhere on that one. And that's people a lot of people like to go out on those"

text_2 = "Well, I came out and put on these and run around. And uh I like to run. I see the car. And I don't know, I uh. Yeah they're over there is going to put up and have a party uh. It's a child getting water. And she's she's going to making something nice. That's my father. He goes everywhere on that one. And that's people a lot of people like to go out on those."

from frame_semantic_transformer import FrameSemanticTransformer
fst_small = FrameSemanticTransformer("small")
fst_base = FrameSemanticTransformer("base")

>>>fst_small.detect_frames(text_1)
>>> DetectFramesResult(sentence="Well, I came out and put on these and run around. And uh I like to run. I see the car. And I don't know, I uh. Yeah they're over there is going to put up and have a party uh. It's a child getting water. And she's she's going to making something nice. That's my father. He goes everywhere on that one. And that's people a lot of people like to go out on those", trigger_locations=[8, 38, 59, 67, 99, 182, 261, 335], frames=[FrameResult(name='Arriving', trigger_location=8, frame_elements=[FrameElementResult(name='Theme', text='I'), FrameElementResult(name='Goal', text='out')]), FrameResult(name='Self_motion', trigger_location=38, frame_elements=[FrameElementResult(name='Self_mover', text='I'), FrameElementResult(name='Goal', text='around')]), FrameResult(name='Likelihood', trigger_location=59, frame_elements=[FrameElementResult(name='Hypothetical_event', text='I'), FrameElementResult(name='Hypothetical_event', text='to run')]), FrameResult(name='Self_motion', trigger_location=67, frame_elements=[FrameElementResult(name='Self_mover', text='I')]), FrameResult(name='Awareness', trigger_location=99, frame_elements=[FrameElementResult(name='Cognizer', text='I'), FrameElementResult(name='Content', text='I')]), FrameResult(name='People_by_age', trigger_location=182, frame_elements=[FrameElementResult(name='Person', text='child')]), FrameResult(name='Kinship', trigger_location=261, frame_elements=[FrameElementResult(name='Ego', text='my'), FrameElementResult(name='Alter', text='father')]), FrameResult(name='Likelihood', trigger_location=335, frame_elements=[FrameElementResult(name='Hypothetical_event', text='people a lot of people'), FrameElementResult(name='Hypothetical_event', text='to go out on those')])])

>>>fst_small.detect_frames(text_2)
>>>---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
/tmp/ipykernel_1767205/3259689296.py in <module>
----> 1 fst_small.detect_frames(text_2)

~/anaconda3/envs/ktrain_loo/lib/python3.7/site-packages/frame_semantic_transformer/FrameSemanticTransformer.py in detect_frames(self, sentence)
    167         base_sentence, trigger_locs = self._identify_triggers(sentence)
    168         # next detect frames for each trigger
--> 169         frames = self._classify_frames(base_sentence, trigger_locs)
    170 
    171         frame_and_locs = [

~/anaconda3/envs/ktrain_loo/lib/python3.7/site-packages/frame_semantic_transformer/FrameSemanticTransformer.py in _classify_frames(self, sentence, trigger_locs)
    126             frame_classification_tasks, chunk_size=self.max_batch_size
    127         ):
--> 128             batch_results = self._batch_predict([task.get_input() for task in batch])
    129             for preds, frame_task in zip(
    130                 chunk_list(batch_results, self.predictions_per_sample),

~/anaconda3/envs/ktrain_loo/lib/python3.7/site-packages/frame_semantic_transformer/FrameSemanticTransformer.py in <listcomp>(.0)
    126             frame_classification_tasks, chunk_size=self.max_batch_size
    127         ):
--> 128             batch_results = self._batch_predict([task.get_input() for task in batch])
    129             for preds, frame_task in zip(
    130                 chunk_list(batch_results, self.predictions_per_sample),

~/anaconda3/envs/ktrain_loo/lib/python3.7/site-packages/frame_semantic_transformer/data/tasks/FrameClassificationTask.py in get_input(self)
     25 
     26     def get_input(self) -> str:
---> 27         potential_frames = get_possible_frames_for_trigger_bigrams(self.trigger_bigrams)
     28         return f"FRAME {' '.join(potential_frames)} : {self.trigger_labeled_text}"
     29 

~/anaconda3/envs/ktrain_loo/lib/python3.7/site-packages/frame_semantic_transformer/data/tasks/FrameClassificationTask.py in trigger_bigrams(self)
     44         pre_trigger_tokens = self.text[: self.trigger_loc].split()
     45         trigger_and_after_tokens = self.text[self.trigger_loc :].split()
---> 46         trigger = trigger_and_after_tokens[0]
     47         post_trigger_tokens = trigger_and_after_tokens[1:]
     48         bigrams: list[list[str]] = []

IndexError: list index out of range

>>> fst_base.detect_frames(text_1)
>>> DetectFramesResult(sentence="Well, I came out and put on these and run around. And uh I like to run. And I don't know, I uh. Yeah they're over there is going to put up and have a party uh. It's a child getting water. And she's she's going to make something nice. That's my father. He goes everywhere on that one. And that's people a lot of people like to go out on those", trigger_locations=[8, 38, 59, 67, 84, 173, 228, 244, 295, 302, 304, 311, 318, 326], frames=[FrameResult(name='Arriving', trigger_location=8, frame_elements=[FrameElementResult(name='Theme', text='I'), FrameElementResult(name='Goal', text='out')]), FrameResult(name='Self_motion', trigger_location=38, frame_elements=[FrameElementResult(name='Self_mover', text='I'), FrameElementResult(name='Path', text='around')]), FrameResult(name='Experiencer_focus', trigger_location=59, frame_elements=[FrameElementResult(name='Experiencer', text='I'), FrameElementResult(name='Content', text='to run')]), FrameResult(name='Self_motion', trigger_location=67, frame_elements=[FrameElementResult(name='Self_mover', text='I')]), FrameResult(name='Awareness', trigger_location=84, frame_elements=[FrameElementResult(name='Cognizer', text='I')]), FrameResult(name='Getting', trigger_location=173, frame_elements=[FrameElementResult(name='Recipient', text='a child'), FrameElementResult(name='Theme', text='water')]), FrameResult(name='Stimulus_focus', trigger_location=228, frame_elements=[FrameElementResult(name='Stimulus', text='something')]), FrameResult(name='Kinship', trigger_location=244, frame_elements=[FrameElementResult(name='Ego', text='my'), FrameElementResult(name='Alter', text='father')]), FrameResult(name='People', trigger_location=295, frame_elements=[FrameElementResult(name='Person', text='people')]), FrameResult(name='Quantified_mass', trigger_location=302, frame_elements=[FrameElementResult(name='Quantity', text='a lot'), FrameElementResult(name='Individuals', text='of people')]), FrameResult(name='Quantified_mass', trigger_location=304, frame_elements=[FrameElementResult(name='Quantity', text='a lot'), FrameElementResult(name='Individuals', text='of people')]), FrameResult(name='People', trigger_location=311, frame_elements=[FrameElementResult(name='Person', text='people')]), FrameResult(name='Desiring', trigger_location=318, frame_elements=[FrameElementResult(name='Experiencer', text='people'), FrameElementResult(name='Event', text='to go out on those')]), FrameResult(name='Motion', trigger_location=326, frame_elements=[FrameElementResult(name='Theme', text='people'), FrameElementResult(name='Goal', text='out on those')])])

>>> fst_base.detect_frames(text_2)
>>>DetectFramesResult(sentence="Well, I came out and put on these and run around. And uh I like to run. And I don't know, I uh. Yeah they're over there is going to put up and have a party uh. It's a child getting water. And she's she's going to make something nice. That's my father. He goes everywhere on that one. And that's people a lot of people like to go out on those.", trigger_locations=[8, 38, 59, 67, 84, 173, 228, 244, 295, 302, 304, 311, 318, 326], frames=[FrameResult(name='Arriving', trigger_location=8, frame_elements=[FrameElementResult(name='Theme', text='I'), FrameElementResult(name='Goal', text='out')]), FrameResult(name='Self_motion', trigger_location=38, frame_elements=[FrameElementResult(name='Self_mover', text='I'), FrameElementResult(name='Path', text='around')]), FrameResult(name='Experiencer_focus', trigger_location=59, frame_elements=[FrameElementResult(name='Experiencer', text='I'), FrameElementResult(name='Content', text='to run')]), FrameResult(name='Self_motion', trigger_location=67, frame_elements=[FrameElementResult(name='Self_mover', text='I')]), FrameResult(name='Awareness', trigger_location=84, frame_elements=[FrameElementResult(name='Cognizer', text='I')]), FrameResult(name='Getting', trigger_location=173, frame_elements=[FrameElementResult(name='Recipient', text='a child'), FrameElementResult(name='Theme', text='water')]), FrameResult(name='Stimulus_focus', trigger_location=228, frame_elements=[FrameElementResult(name='Stimulus', text='something')]), FrameResult(name='Kinship', trigger_location=244, frame_elements=[FrameElementResult(name='Ego', text='my'), FrameElementResult(name='Alter', text='father')]), FrameResult(name='People', trigger_location=295, frame_elements=[FrameElementResult(name='Person', text='people')]), FrameResult(name='Quantified_mass', trigger_location=302, frame_elements=[FrameElementResult(name='Quantity', text='a lot'), FrameElementResult(name='Individuals', text='of people')]), FrameResult(name='Quantified_mass', trigger_location=304, frame_elements=[FrameElementResult(name='Quantity', text='a lot'), FrameElementResult(name='Individuals', text='of people')]), FrameResult(name='People', trigger_location=311, frame_elements=[FrameElementResult(name='Person', text='people')]), FrameResult(name='Desiring', trigger_location=318, frame_elements=[FrameElementResult(name='Experiencer', text='people'), FrameElementResult(name='Event', text='to go out on those')]), FrameResult(name='Motion', trigger_location=326, frame_elements=[FrameElementResult(name='Theme', text='people'), FrameElementResult(name='Source', text='out'), FrameElementResult(name='Goal', text='on those')])])
```

Seems like a bug to me!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

IndexError for some texts using t5-small #22

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

IndexError for some texts using t5-small #22

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions