Skip to content

Commit 545bbf6

Browse files
authored
Fix the forwarding of the length penalty parameter (IBM#70)
It was erroneously tied to the presence of the repetition penalty parameter. So when the repetition penalty wasn't set, the server would get this parameter even though the client sent it to the router. --------- Signed-off-by: Max de Bayser <[email protected]>
1 parent 6175107 commit 545bbf6

File tree

3 files changed

+40
-4
lines changed

3 files changed

+40
-4
lines changed

integration_tests/test_cases_bloom560m.yaml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1254,7 +1254,7 @@
12541254

12551255

12561256
# Length penalty
1257-
- name: Length penalty
1257+
- name: Length penalty with repetition penalty
12581258
request:
12591259
params:
12601260
decoding:
@@ -1273,6 +1273,24 @@
12731273
stopReason: EOS_TOKEN
12741274
text: The first time I saw the movie, it was in
12751275

1276+
# Length penalty
1277+
- name: Length penalty
1278+
request:
1279+
params:
1280+
decoding:
1281+
length_penalty:
1282+
start_index: 8
1283+
decay_factor: 1.01
1284+
stopping:
1285+
maxNewTokens: 20
1286+
requests:
1287+
- {"text": "A very long story:\n"}
1288+
response:
1289+
responses:
1290+
- generatedTokenCount: 12
1291+
inputTokenCount: 6
1292+
stopReason: EOS_TOKEN
1293+
text: The first time I saw the movie, I was a
12761294

12771295
# Multiple inputs
12781296
- name: Multiple inputs

integration_tests/test_cases_mt0small.yaml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1166,7 +1166,7 @@
11661166

11671167

11681168
# Length penalty
1169-
- name: Length penalty
1169+
- name: Length penalty with repetition penalty
11701170
request:
11711171
params:
11721172
decoding:
@@ -1186,6 +1186,24 @@
11861186
text: The very long story is
11871187

11881188

1189+
# Length penalty
1190+
- name: Length penalty
1191+
request:
1192+
params:
1193+
decoding:
1194+
length_penalty:
1195+
start_index: 3
1196+
decay_factor: 4.0
1197+
stopping:
1198+
maxNewTokens: 20
1199+
requests:
1200+
- {"text": "A very long story:\n"}
1201+
response:
1202+
responses:
1203+
- generatedTokenCount: 7
1204+
inputTokenCount: 8
1205+
stopReason: EOS_TOKEN
1206+
text: The very long story is
11891207

11901208
# Multiple inputs
11911209
- name: Multiple inputs

router/src/grpc_server.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -545,9 +545,9 @@ fn convert_params(
545545
if let Some(d) = p.decoding {
546546
if d.repetition_penalty != 0.0 {
547547
gp.repetition_penalty = d.repetition_penalty;
548-
gp.length_penalty =
549-
d.length_penalty.map(|lp| (lp.start_index, lp.decay_factor));
550548
}
549+
gp.length_penalty =
550+
d.length_penalty.map(|lp| (lp.start_index, lp.decay_factor));
551551
}
552552
// Stopping Criteria
553553
if let Some(s) = p.stopping {

0 commit comments

Comments
 (0)