-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathimpressionist.py
More file actions
248 lines (221 loc) · 13.8 KB
/
impressionist.py
File metadata and controls
248 lines (221 loc) · 13.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import json
from productionist import Precondition, ConditionalViolation
class Impressionist(object):
"""An NLU module for in-game dialogue understanding."""
def __init__(self, game, debug=False):
"""Initialize an Impressionist object."""
self.game = game
self.debug = debug
self.nonterminal_symbols = self._init_parse_json_grammar_specification(
path_to_json_grammar_specification=game.config.path_to_dialogue_nlu_json_grammar_specification
)
@staticmethod
def _init_parse_json_grammar_specification(path_to_json_grammar_specification):
"""Parse a JSON grammar specification exported by Expressionist to instantiate nonterminal symbols."""
# Parse the JSON specification to build a dictionary data structure
symbol_objects = []
grammar_dictionary = json.loads(open(path_to_json_grammar_specification).read())
nonterminal_symbol_specifications = grammar_dictionary['nonterminals']
for tag, nonterminal_symbol_specification in nonterminal_symbol_specifications.iteritems():
raw_markup = nonterminal_symbol_specification['markup']
symbol_object = NonterminalSymbol(tag=tag, raw_markup=raw_markup)
symbol_objects.append(symbol_object)
return symbol_objects
def understand_player_utterance(self, player_utterance, conversation):
"""Understand the player's utterance to return a fully specified line of dialogue."""
# Preprocess the utterance
preprocessed_player_utterance = self._preprocess_player_utterance(raw_player_utterance=player_utterance)
# Using our LSTM RNN, decode the player's utterance into a predicted Expressionist
# trace that could have generated it
predicted_trace = self._decode_player_utterance(player_utterance=preprocessed_player_utterance)
# Instantiate a LineOfDialogue object for this utterance and trace; this object's
# __init__() method will parse the trace to inherit mark-up from the symbols embedded
# within it, and it will also be responsible for furnishing the utterance as a line of
# dialogue that will be deployed in the actual conversation
line_of_dialogue_object = LineOfDialogue(
utterance_that_the_player_composed=player_utterance, predicted_trace=predicted_trace,
conversation=conversation
)
# Return this line object to the Conversation object that called this method
return line_of_dialogue_object
@staticmethod
def _preprocess_player_utterance(raw_player_utterance):
"""Preprocess a player utterance according to the LSTM decoder's expectations."""
# Prepend and append whitespace
preprocessed_player_utterance = " {raw_player_utterance} ".format(raw_player_utterance=raw_player_utterance)
# Separate punctuation with whitespace (to make each their own token)
for punctuation_symbol in (',', ".", "?", "!", "..."):
preprocessed_player_utterance = (
preprocessed_player_utterance.replace(punctuation_symbol, ' {}'.format(punctuation_symbol))
)
return preprocessed_player_utterance
def _decode_player_utterance(self, player_utterance):
"""Using our LSTM model, decode the player utterance into a predicted Expressionist trace."""
# THIS CAN'T BE DONE UNTIL WE SETTLE ON AN ELEGANT WAY OF INCORPORATING TENSORFLOW
# INTO THE TALK OF THE TOWN FRAMEWORK
def furnish_nonterminal_symbols(self, symbol_references):
"""Furnish actual NonterminalSymbol objects for each of the provided symbol references.
@param symbol_references: A list of symbol references (i.e., symbol tags).
"""
actual_symbol_objects = set()
for symbol_reference in symbol_references:
try:
symbol_object = next(s for s in self.nonterminal_symbols if s.tag.lower() == symbol_reference.lower())
actual_symbol_objects.add(symbol_object)
except StopIteration:
raise Exception(
"Impressionist encountered a strange symbol reference: {symbol_ref}".format(
symbol_ref=symbol_reference
)
)
return actual_symbol_objects
class NonterminalSymbol(object):
"""A symbol in an NLU system for in-game dialogue understanding.
This class is very similar to productionist.NonterminalSymbol, but excludes concerns
that pertain only to NLG issues and includes concerns not included in that class that
pertain to NLU concerns. The classes could potentially be merged into one, but my
feeling is that it's easier to have them teased apart (even though they will end up
having code in common).
"""
def __init__(self, tag, raw_markup):
"""Initialize a NonterminalSymbol object."""
self.tag = tag
# Prepare annotation sets that will be populated (as appropriate) by _init_parse_markup()
self.preconditions = set()
self.conditional_violations = set()
self.propositions = set()
self.change_subject_to = '' # Raw Python snippet that when executed will change the subject of conversation
self.moves = set() # The dialogue moves constituted by the delivery of this line
self.speaker_obligations_pushed = set() # Line asserts speaker conversational obligations
self.interlocutor_obligations_pushed = set() # Line asserts interlocutor conversational obligations
self.topics_pushed = set() # Line introduces a new topic of conversation
self.topics_addressed = set() # Line addresses a topic of conversation
self._init_parse_markup(raw_markup=raw_markup)
def __str__(self):
"""Return string representation."""
return '[[{tag}]]'.format(tag=self.tag)
def _init_parse_markup(self, raw_markup):
"""Instantiate and attribute objects for the annotations attributed to this symbol."""
for tagset in raw_markup:
for tag in raw_markup[tagset]:
if tagset == "Preconditions":
self.preconditions.add(Precondition(tag=tag))
elif tagset == "ViolationConditions":
self.conditional_violations.add(ConditionalViolation(tag=tag))
elif tagset == "Propositions":
self.propositions.add(tag)
# Acts, goals, obligations, and topics are reified as objects during a conversation, but
# here are only represented as a tag
elif tagset == "Moves":
self.moves.add(tag)
elif tagset == "PushObligation": # Obligations pushed onto interlocutor
self.interlocutor_obligations_pushed.add(tag)
elif tagset == "PushSpeakerObligation": # Obligations pushed onto speaker (by their own line)
self.speaker_obligations_pushed.add(tag)
elif tagset == "PushTopic":
self.topics_pushed.add(tag)
elif tagset == "AddressTopic":
self.topics_addressed.add(tag)
elif tagset == "Context":
pass
elif tagset == "EffectConditions":
pass # TODO
elif tagset == "ChangeSubjectTo":
pass # TODO REMOVE THIS TAGSET
elif tagset == "UserStudyQueryArguments":
pass # This one is currently for LSTM training only
else:
raise Exception('Unknown tagset encountered: {}'.format(tagset))
@property
def all_markup(self):
"""Return all the annotations attributed to this symbol."""
all_markup = (
self.preconditions | self.conditional_violations | self.propositions |
{self.change_subject_to} | self.moves | self.speaker_obligations_pushed |
self.interlocutor_obligations_pushed | self.topics_pushed | self.topics_addressed
)
return list(all_markup)
def conversational_violations(self, conversation):
"""Return a list of names of conversational violations that will be incurred if this line is deployed now."""
violations_incurred = [
potential_violation.name for potential_violation in self.conditional_violations if
potential_violation.evaluate(state=conversation)
]
return violations_incurred
class LineOfDialogue(object):
"""A line of dialogue that may be used during a conversation."""
def __init__(self, utterance_that_the_player_composed, predicted_trace, conversation):
"""Initialize a LineOfDialogue object.
@param utterance_that_the_player_composed: The very string that the player submitted as her utterance.
@param predicted_trace: The output of our LSTM RNN, given line_that_player_composed as input.
More specifically, this is a predicted Expressionist trace that could
have generated the utterance that the player submitted.
@param conversation: The Conversation object representing the conversation that this line of
dialogue will be deployed in. This object holds all the conversation state
that we need to access to recognize state-dependent conversational violations,
etc., that may be attributed to this line. It will also provide access to
an Impressionist object, which is necessary to furnish the NonterminalSymbol
objects that will be associated with this object (and will be utilized to
attribute all its symbolic markup).
"""
self.body = utterance_that_the_player_composed
self.predicted_trace = predicted_trace
self.symbols = self._init_parse_predicted_trace(impressionist=conversation.impressionist)
# Prepare annotation attributes
self.conversational_violations = set()
self.propositions = set()
self.change_subject_to = '' # Raw Python snippet that when executed will change the subject of conversation
self.moves = set() # The dialogue moves constituted by the delivery of this line
self.speaker_obligations_pushed = set() # Line asserts speaker conversational obligations
self.interlocutor_obligations_pushed = set() # Line asserts interlocutor conversational obligations
self.topics_pushed = set() # Line introduces a new topic of conversation
self.topics_addressed = set() # Line addresses a topic of conversation
self._init_inherit_markup(conversation=conversation)
def __str__(self):
"""Return the raw template characterizing this line of dialogue."""
return self.body
def _init_parse_predicted_trace(self, impressionist):
"""Parse our RNN's predicted Expressionist trace for this line to produce a set of nonterminal
symbols that will be associated to it (which we will use as keys to access the symbolic mark-up
that will be attributed to the line so that the NPC who hears it can actually understand it).
This method currently assumed traces that are formatted like this one:
greet { greet|back { greet|back|immediately { greeting|word { } ^ expressive|punct {
expressive|punct>extroverted { } } } } }
"""
# Parse out the symbol references themselves
symbol_references = {
symbol_ref for symbol_ref in self.predicted_trace.split(' ') if
'{' not in symbol_ref and '}' not in symbol_ref and '^' not in symbol_ref
}
# Reformat the symbol references (which will be formatted like, e.g., 'greet|back|immediately')
# to match the formatting they will have in the Impressionist object's 'nonterminal_symbols'
# listing (e.g., 'greet back immediately')
symbol_references = {' '.join(symbol_ref.split('|')) for symbol_ref in symbol_references}
# Ask an Impressionist object for a list of the actual NonterminalSymbols corresponding to these
# symbol references (which hold all the symbolic mark-up that we're after)
symbol_objects = impressionist.furnish_nonterminal_symbols(symbol_references=symbol_references)
# Return this list of symbol objects
return symbol_objects
def _init_inherit_markup(self, conversation):
"""Inherit the mark-up of all the symbols that were expanded in the construction of this dialogue template."""
assert len({s.change_subject_to for s in self.symbols if s.change_subject_to}) < 2, (
"Line of dialogue {} is inheriting from symbols with contradicting 'change_subject_to' annotations."
)
for symbol in self.symbols:
self.conversational_violations |= set(symbol.conversational_violations(conversation=conversation))
self.propositions |= symbol.propositions
self.change_subject_to = symbol.change_subject_to if symbol.change_subject_to else self.change_subject_to
self.moves |= symbol.moves
self.speaker_obligations_pushed |= symbol.speaker_obligations_pushed
self.interlocutor_obligations_pushed |= symbol.interlocutor_obligations_pushed
self.topics_pushed |= symbol.topics_pushed
self.topics_addressed |= symbol.topics_addressed
def realize(self, conversation_turn=None):
"""Return the very line that the player composed.
Note: This method exists to maintain an equivalent interface to objects of the
classes Productionist.LineOfDialogue and Impressionist.LineOfDialogue, so that
Conversation objects can call LineOfDialogue.realize() without needing to know
which type of LineOfDialogue object is being referenced. This is why the method
takes the argument 'conversation_turn'.
"""
return self.body