@@ -102,6 +102,7 @@ def probe(self, generator) -> List[garak.attempt.Attempt]:
102102 keep_going = True
103103
104104 last_response = None
105+ new_conversation = True
105106 last_attempt = None
106107
107108 if not output_is_conversation :
@@ -116,16 +117,22 @@ def probe(self, generator) -> List[garak.attempt.Attempt]:
116117 f" turn { t .n :02} : red teaming [{ red_team_model_short [:10 ]:<10} ]"
117118 )
118119 # generate a challenge
119- if last_response == "" and self .constructive_tension :
120+ if (
121+ last_response == ""
122+ and self .constructive_tension
123+ and not new_conversation
124+ ):
120125 challenge = ""
126+
121127 else :
122- if last_response :
123- if self .use_only_first_sent :
124- last_response = last_response .split (". " )[
125- 0
126- ] # nltk sentence splitter install too unfriendly
127- else :
128- last_response = "" # maybe it's none or 0.0 or [] or goodness knows. let's get an empty string.
128+ if new_conversation :
129+ last_response = ""
130+ new_conversation = False
131+
132+ if self .use_only_first_sent :
133+ last_response = last_response .split (". " )[
134+ 0
135+ ] # nltk sentence splitter install too unfriendly
129136
130137 if len (self .red_team_prompt_template ):
131138 query = self .red_team_prompt_template .replace (
@@ -188,25 +195,34 @@ def probe(self, generator) -> List[garak.attempt.Attempt]:
188195 )
189196 # send the challenge in the target language and get the response
190197 response = generator .generate (this_attempt .prompt )
191- if response is None or len (response ) == 0 :
192- response_text = ""
198+ if (
199+ response is None
200+ or len (response ) == 0
201+ or response [0 ] is None
202+ or response [0 ].text is None
203+ ):
204+ response_text = None
193205 else :
194- response_text = (
195- response [0 ].text .strip () if response [0 ] is not None else ""
196- )
206+ response_text = response [0 ].text .strip ()
197207 this_attempt .outputs = response
198208
199209 # log the response
200- response_to_store = self .reverse_langprovider .get_text ([response_text ])[
201- 0
202- ]
210+ response_to_store = (
211+ self .reverse_langprovider .get_text ([response_text ])[0 ]
212+ if response_text is not None
213+ else None
214+ )
203215 if (
204216 self .reverse_langprovider .source_lang
205217 != self .reverse_langprovider .target_lang
206218 ):
207- this_attempt .reverse_translation_outputs = [
208- garak .attempt .Message (response_to_store )
209- ]
219+ # when the generator returns [None] and translation is active store store [None]
220+ if response and response [0 ] is None :
221+ this_attempt .reverse_translation_outputs = response
222+ else :
223+ this_attempt .reverse_translation_outputs = [
224+ garak .attempt .Message (response_to_store )
225+ ]
210226 logging .debug ("atkgen: model: %s" , response_text )
211227 if output_is_conversation :
212228 print (
@@ -218,12 +234,26 @@ def probe(self, generator) -> List[garak.attempt.Attempt]:
218234 calls_made += 1
219235 # last_response needs to be in the attack model's language base update on `response_to_store`
220236 # check if the resp is empty or if it matches the previous resp
221- if not len (response_to_store ) and not self .constructive_tension :
237+ if (
238+ response_to_store
239+ and not len (response_to_store )
240+ and not self .constructive_tension
241+ ):
222242 keep_going = False
223- if response_to_store == last_response :
243+ # if response_to_store is None it will be coerced to "" on iteration hence the extra check
244+ if (response_to_store == last_response ) or (
245+ len (last_response ) == 0 and not response_to_store
246+ ):
224247 keep_going = False and not self .allow_repetition
225248 # update last_response
226- last_response = response_to_store .replace ("\n " , " " ).strip ()
249+ last_response = (
250+ response_to_store .replace ("\n " , " " ).strip ()
251+ if response_to_store
252+ else None
253+ )
254+
255+ if last_response is None :
256+ keep_going = False
227257
228258 _config .transient .reportfile .write (
229259 json .dumps (this_attempt .as_dict ()) + "\n "
0 commit comments