Skip to content

IndexError for some texts using t5-small #22

@cbjrobertson

Description

@cbjrobertson

For some texts, using t5-small, detect_frames returns a cryptic index error.

MRE:

#the two texts below differ *only* in that text_2 has a trailing period

text_1 = "Well, I came out and put on these and run around. And uh I like to run. I see the car. And I don't know, I uh. Yeah they're over there is going to put up and have a party uh. It's a child getting water. And she's she's going to making something nice. That's my father. He goes everywhere on that one. And that's people a lot of people like to go out on those"

text_2 = "Well, I came out and put on these and run around. And uh I like to run. I see the car. And I don't know, I uh. Yeah they're over there is going to put up and have a party uh. It's a child getting water. And she's she's going to making something nice. That's my father. He goes everywhere on that one. And that's people a lot of people like to go out on those."

from frame_semantic_transformer import FrameSemanticTransformer
fst_small = FrameSemanticTransformer("small")
fst_base = FrameSemanticTransformer("base")

>>>fst_small.detect_frames(text_1)
>>> DetectFramesResult(sentence="Well, I came out and put on these and run around. And uh I like to run. I see the car. And I don't know, I uh. Yeah they're over there is going to put up and have a party uh. It's a child getting water. And she's she's going to making something nice. That's my father. He goes everywhere on that one. And that's people a lot of people like to go out on those", trigger_locations=[8, 38, 59, 67, 99, 182, 261, 335], frames=[FrameResult(name='Arriving', trigger_location=8, frame_elements=[FrameElementResult(name='Theme', text='I'), FrameElementResult(name='Goal', text='out')]), FrameResult(name='Self_motion', trigger_location=38, frame_elements=[FrameElementResult(name='Self_mover', text='I'), FrameElementResult(name='Goal', text='around')]), FrameResult(name='Likelihood', trigger_location=59, frame_elements=[FrameElementResult(name='Hypothetical_event', text='I'), FrameElementResult(name='Hypothetical_event', text='to run')]), FrameResult(name='Self_motion', trigger_location=67, frame_elements=[FrameElementResult(name='Self_mover', text='I')]), FrameResult(name='Awareness', trigger_location=99, frame_elements=[FrameElementResult(name='Cognizer', text='I'), FrameElementResult(name='Content', text='I')]), FrameResult(name='People_by_age', trigger_location=182, frame_elements=[FrameElementResult(name='Person', text='child')]), FrameResult(name='Kinship', trigger_location=261, frame_elements=[FrameElementResult(name='Ego', text='my'), FrameElementResult(name='Alter', text='father')]), FrameResult(name='Likelihood', trigger_location=335, frame_elements=[FrameElementResult(name='Hypothetical_event', text='people a lot of people'), FrameElementResult(name='Hypothetical_event', text='to go out on those')])])

>>>fst_small.detect_frames(text_2)
>>>---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
/tmp/ipykernel_1767205/3259689296.py in <module>
----> 1 fst_small.detect_frames(text_2)

~/anaconda3/envs/ktrain_loo/lib/python3.7/site-packages/frame_semantic_transformer/FrameSemanticTransformer.py in detect_frames(self, sentence)
    167         base_sentence, trigger_locs = self._identify_triggers(sentence)
    168         # next detect frames for each trigger
--> 169         frames = self._classify_frames(base_sentence, trigger_locs)
    170 
    171         frame_and_locs = [

~/anaconda3/envs/ktrain_loo/lib/python3.7/site-packages/frame_semantic_transformer/FrameSemanticTransformer.py in _classify_frames(self, sentence, trigger_locs)
    126             frame_classification_tasks, chunk_size=self.max_batch_size
    127         ):
--> 128             batch_results = self._batch_predict([task.get_input() for task in batch])
    129             for preds, frame_task in zip(
    130                 chunk_list(batch_results, self.predictions_per_sample),

~/anaconda3/envs/ktrain_loo/lib/python3.7/site-packages/frame_semantic_transformer/FrameSemanticTransformer.py in <listcomp>(.0)
    126             frame_classification_tasks, chunk_size=self.max_batch_size
    127         ):
--> 128             batch_results = self._batch_predict([task.get_input() for task in batch])
    129             for preds, frame_task in zip(
    130                 chunk_list(batch_results, self.predictions_per_sample),

~/anaconda3/envs/ktrain_loo/lib/python3.7/site-packages/frame_semantic_transformer/data/tasks/FrameClassificationTask.py in get_input(self)
     25 
     26     def get_input(self) -> str:
---> 27         potential_frames = get_possible_frames_for_trigger_bigrams(self.trigger_bigrams)
     28         return f"FRAME {' '.join(potential_frames)} : {self.trigger_labeled_text}"
     29 

~/anaconda3/envs/ktrain_loo/lib/python3.7/site-packages/frame_semantic_transformer/data/tasks/FrameClassificationTask.py in trigger_bigrams(self)
     44         pre_trigger_tokens = self.text[: self.trigger_loc].split()
     45         trigger_and_after_tokens = self.text[self.trigger_loc :].split()
---> 46         trigger = trigger_and_after_tokens[0]
     47         post_trigger_tokens = trigger_and_after_tokens[1:]
     48         bigrams: list[list[str]] = []

IndexError: list index out of range

>>> fst_base.detect_frames(text_1)
>>> DetectFramesResult(sentence="Well, I came out and put on these and run around. And uh I like to run. And I don't know, I uh. Yeah they're over there is going to put up and have a party uh. It's a child getting water. And she's she's going to make something nice. That's my father. He goes everywhere on that one. And that's people a lot of people like to go out on those", trigger_locations=[8, 38, 59, 67, 84, 173, 228, 244, 295, 302, 304, 311, 318, 326], frames=[FrameResult(name='Arriving', trigger_location=8, frame_elements=[FrameElementResult(name='Theme', text='I'), FrameElementResult(name='Goal', text='out')]), FrameResult(name='Self_motion', trigger_location=38, frame_elements=[FrameElementResult(name='Self_mover', text='I'), FrameElementResult(name='Path', text='around')]), FrameResult(name='Experiencer_focus', trigger_location=59, frame_elements=[FrameElementResult(name='Experiencer', text='I'), FrameElementResult(name='Content', text='to run')]), FrameResult(name='Self_motion', trigger_location=67, frame_elements=[FrameElementResult(name='Self_mover', text='I')]), FrameResult(name='Awareness', trigger_location=84, frame_elements=[FrameElementResult(name='Cognizer', text='I')]), FrameResult(name='Getting', trigger_location=173, frame_elements=[FrameElementResult(name='Recipient', text='a child'), FrameElementResult(name='Theme', text='water')]), FrameResult(name='Stimulus_focus', trigger_location=228, frame_elements=[FrameElementResult(name='Stimulus', text='something')]), FrameResult(name='Kinship', trigger_location=244, frame_elements=[FrameElementResult(name='Ego', text='my'), FrameElementResult(name='Alter', text='father')]), FrameResult(name='People', trigger_location=295, frame_elements=[FrameElementResult(name='Person', text='people')]), FrameResult(name='Quantified_mass', trigger_location=302, frame_elements=[FrameElementResult(name='Quantity', text='a lot'), FrameElementResult(name='Individuals', text='of people')]), FrameResult(name='Quantified_mass', trigger_location=304, frame_elements=[FrameElementResult(name='Quantity', text='a lot'), FrameElementResult(name='Individuals', text='of people')]), FrameResult(name='People', trigger_location=311, frame_elements=[FrameElementResult(name='Person', text='people')]), FrameResult(name='Desiring', trigger_location=318, frame_elements=[FrameElementResult(name='Experiencer', text='people'), FrameElementResult(name='Event', text='to go out on those')]), FrameResult(name='Motion', trigger_location=326, frame_elements=[FrameElementResult(name='Theme', text='people'), FrameElementResult(name='Goal', text='out on those')])])

>>> fst_base.detect_frames(text_2)
>>>DetectFramesResult(sentence="Well, I came out and put on these and run around. And uh I like to run. And I don't know, I uh. Yeah they're over there is going to put up and have a party uh. It's a child getting water. And she's she's going to make something nice. That's my father. He goes everywhere on that one. And that's people a lot of people like to go out on those.", trigger_locations=[8, 38, 59, 67, 84, 173, 228, 244, 295, 302, 304, 311, 318, 326], frames=[FrameResult(name='Arriving', trigger_location=8, frame_elements=[FrameElementResult(name='Theme', text='I'), FrameElementResult(name='Goal', text='out')]), FrameResult(name='Self_motion', trigger_location=38, frame_elements=[FrameElementResult(name='Self_mover', text='I'), FrameElementResult(name='Path', text='around')]), FrameResult(name='Experiencer_focus', trigger_location=59, frame_elements=[FrameElementResult(name='Experiencer', text='I'), FrameElementResult(name='Content', text='to run')]), FrameResult(name='Self_motion', trigger_location=67, frame_elements=[FrameElementResult(name='Self_mover', text='I')]), FrameResult(name='Awareness', trigger_location=84, frame_elements=[FrameElementResult(name='Cognizer', text='I')]), FrameResult(name='Getting', trigger_location=173, frame_elements=[FrameElementResult(name='Recipient', text='a child'), FrameElementResult(name='Theme', text='water')]), FrameResult(name='Stimulus_focus', trigger_location=228, frame_elements=[FrameElementResult(name='Stimulus', text='something')]), FrameResult(name='Kinship', trigger_location=244, frame_elements=[FrameElementResult(name='Ego', text='my'), FrameElementResult(name='Alter', text='father')]), FrameResult(name='People', trigger_location=295, frame_elements=[FrameElementResult(name='Person', text='people')]), FrameResult(name='Quantified_mass', trigger_location=302, frame_elements=[FrameElementResult(name='Quantity', text='a lot'), FrameElementResult(name='Individuals', text='of people')]), FrameResult(name='Quantified_mass', trigger_location=304, frame_elements=[FrameElementResult(name='Quantity', text='a lot'), FrameElementResult(name='Individuals', text='of people')]), FrameResult(name='People', trigger_location=311, frame_elements=[FrameElementResult(name='Person', text='people')]), FrameResult(name='Desiring', trigger_location=318, frame_elements=[FrameElementResult(name='Experiencer', text='people'), FrameElementResult(name='Event', text='to go out on those')]), FrameResult(name='Motion', trigger_location=326, frame_elements=[FrameElementResult(name='Theme', text='people'), FrameElementResult(name='Source', text='out'), FrameElementResult(name='Goal', text='on those')])])

Seems like a bug to me!

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions