Skip to content

Commit af289f5

Browse files
sayanshaw24Sayan Shaw
andauthored
avoid always skipping special tokens when decoding (#983)
Co-authored-by: Sayan Shaw <[email protected]>
1 parent 492286d commit af289f5

File tree

1 file changed

+9
-3
lines changed

1 file changed

+9
-3
lines changed

operators/tokenizer/bpe_streaming.hpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,17 @@ class BpeStreamingDecoder : public KernelBpeDecoder {
5252
bool skip_special_tokens,
5353
bool& f_special_last) const {
5454
bool f_special = all_special_ids_.count(id) ? true : false;
55-
if (skip_special_tokens && f_special) {
56-
f_special_last = f_special;
57-
return {};
55+
if (!(added_tokens_.count(200005) && added_tokens_.at(200005) == "<|channel|>")){
56+
// We do not skip special tokens when decoding IDs for channel-based models as
57+
// they may be relevant to the output.
58+
59+
if (skip_special_tokens && f_special) {
60+
f_special_last = f_special;
61+
return {};
62+
}
5863
}
5964

65+
// Regular decoding logic
6066
if (added_tokens_.count(id)) {
6167
const std::string ws = added_tokens_.at(id);
6268
token = (std::string)ws;

0 commit comments

Comments
 (0)