Skip to content

Commit 91a9072

Browse files
authored
Add return_offsets and truncate_input_tokens tokenize API options (#57)
By popular request Signed-off-by: Nick Hill <[email protected]>
1 parent dacfe50 commit 91a9072

File tree

3 files changed

+138
-12
lines changed

3 files changed

+138
-12
lines changed

integration_tests/test_cases_mt0small.yaml

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,100 @@
5050
- </s>
5151

5252

53+
# Tokenize with offsets
54+
- name: Tokenize with offsets
55+
request_type: tokenize
56+
request:
57+
return_offsets: true
58+
requests:
59+
- {"text": "The very long story is written"}
60+
response:
61+
responses:
62+
- offsets:
63+
- end: 3
64+
- end: 4
65+
start: 3
66+
- end: 8
67+
start: 4
68+
- end: 13
69+
start: 8
70+
- end: 19
71+
start: 13
72+
- end: 22
73+
start: 19
74+
- end: 23
75+
start: 22
76+
- end: 30
77+
start: 23
78+
- {}
79+
tokenCount: 9
80+
81+
82+
# Tokenize with tokens and offsets
83+
- name: Tokenize with tokens and offsets
84+
request_type: tokenize
85+
request:
86+
return_tokens: true
87+
return_offsets: true
88+
requests:
89+
- { "text": "The very long story is written" }
90+
response:
91+
responses:
92+
- offsets:
93+
- end: 3
94+
- end: 4
95+
start: 3
96+
- end: 8
97+
start: 4
98+
- end: 13
99+
start: 8
100+
- end: 19
101+
start: 13
102+
- end: 22
103+
start: 19
104+
- end: 23
105+
start: 22
106+
- end: 30
107+
start: 23
108+
- {}
109+
tokenCount: 9
110+
tokens:
111+
- "\u2581The"
112+
- "\u2581"
113+
- very
114+
- "\u2581long"
115+
- "\u2581story"
116+
- "\u2581is"
117+
- "\u2581"
118+
- written
119+
- </s>
120+
121+
122+
# Tokenize with truncate
123+
- name: Tokenize with tokens and truncation
124+
request_type: tokenize
125+
request:
126+
return_tokens: true
127+
truncate_input_tokens: 10
128+
requests:
129+
- {"text": "The very long story is written by a very long story"}
130+
response:
131+
responses:
132+
- tokenCount: 10
133+
# Truncation happens on the left
134+
tokens:
135+
- "\u2581"
136+
- written
137+
- "\u2581by"
138+
- "\u2581"
139+
- a
140+
- "\u2581"
141+
- very
142+
- "\u2581long"
143+
- "\u2581story"
144+
- </s>
145+
146+
53147
# Basic Greedy (implicit)
54148
- name: Basic Greedy, max new tokens (implicit)
55149
request:

proto/generation.proto

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,11 @@ message TokenInfo {
197197
message BatchedTokenizeRequest {
198198
string model_id = 1;
199199
repeated TokenizeRequest requests = 2;
200-
bool return_tokens = 3; //TBD
200+
bool return_tokens = 3;
201+
bool return_offsets = 4;
202+
203+
// Zero means don't truncate.
204+
uint32 truncate_input_tokens = 5;
201205
}
202206

203207
message BatchedTokenizeResponse {
@@ -209,10 +213,17 @@ message TokenizeRequest {
209213
}
210214

211215
message TokenizeResponse {
216+
message Offset {
217+
uint32 start = 1;
218+
uint32 end = 2;
219+
}
220+
212221
uint32 token_count = 1;
213-
repeated string tokens = 2; // if include_tokens = true
214222

215-
// We'll possibly add more later
223+
// if return_tokens = true
224+
repeated string tokens = 2;
225+
// if return_tokens = true
226+
repeated Offset offsets = 3;
216227
}
217228

218229

router/src/grpc_server.rs

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use crate::{
3131
validation::{RequestSize, ValidationError},
3232
GenerateParameters, GenerateRequest,
3333
};
34+
use crate::pb::fmaas::tokenize_response::Offset;
3435

3536
/// Whether to fail if sampling parameters are provided in greedy-mode requests
3637
/// or to silently ignore them.
@@ -339,17 +340,37 @@ impl GenerationService for GenerationServicer {
339340
let br = request.into_inner();
340341
metrics::increment_counter!("tgi_tokenize_request_count");
341342
let start_time = Instant::now();
342-
self.tokenize_input_counter
343-
.increment(br.requests.len() as u64);
344-
345-
let responses = try_join_all(br.requests.into_iter().map(|tr| {
346-
self.tokenizer.tokenize(tr.text, br.return_tokens).map_ok(
347-
|(_, token_count, encoding)| TokenizeResponse {
348-
token_count: token_count as u32,
349-
tokens: encoding.map_or_else(Vec::new, |e| e.get_tokens().to_vec()),
343+
self.tokenize_input_counter.increment(br.requests.len() as u64);
344+
345+
let truncate_to = match br.truncate_input_tokens {
346+
0 => u32::MAX,
347+
length => length,
348+
};
349+
let include_encoding = br.return_tokens || br.return_offsets;
350+
let responses = try_join_all(br.requests.into_iter().map(|tr|
351+
self.tokenizer.tokenize(tr.text, include_encoding).map_ok(
352+
|(_, token_count, encoding)| {
353+
let token_count = token_count as u32;
354+
let from = token_count.saturating_sub(truncate_to) as usize;
355+
TokenizeResponse {
356+
token_count: token_count.min(truncate_to),
357+
tokens: match br.return_tokens {
358+
true => encoding.as_ref().unwrap().get_tokens()[from..].to_vec(),
359+
false => vec![],
360+
},
361+
offsets: match br.return_offsets {
362+
true => encoding.unwrap().get_offsets()[from..].iter().map(
363+
|(start, end)| Offset{
364+
start: *start as u32,
365+
end: *end as u32,
366+
}
367+
).collect(),
368+
false => vec![],
369+
},
370+
}
350371
},
351372
)
352-
}))
373+
))
353374
.map_err(Status::from_error)
354375
.await?;
355376

0 commit comments

Comments
 (0)