Skip to content

Commit c5671d7

Browse files
authored
[MTP][Unit Test]add test_top_p_candidates (#4046)
* add test_top_p_candidates * fix * fix * fix
1 parent 918ccdb commit c5671d7

File tree

1 file changed

+129
-0
lines changed

1 file changed

+129
-0
lines changed
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import unittest
2+
3+
import numpy as np
4+
import paddle
5+
6+
from fastdeploy.model_executor.ops.gpu import top_p_candidates
7+
8+
9+
def top_p_candidates_dynamic_top_p(probs, top_p_per_bid, candidates_len, output_padding_offset, max_seq_len):
10+
"""
11+
Simulate TopPCandidates, supporting dynamic selection of Top-P values based on bid.
12+
13+
Args:
14+
probs: numpy.ndarray, shape [token_num, vocab_size]
15+
Probability distribution over the vocabulary for each token.
16+
top_p_per_bid: list or numpy.ndarray, shape [num_bid]
17+
Top-P values for each logical block (bid), e.g., [0.7, 0.9, 0.5].
18+
candidates_len: int
19+
Maximum number of candidate tokens to return for each token.
20+
output_padding_offset: numpy.ndarray, shape [token_num]
21+
Offset for each token, used to compute the original token ID (ori_token_id).
22+
max_seq_len: int
23+
Used to compute bid = ori_token_id // max_seq_len.
24+
25+
Returns:
26+
verify_tokens: List[List[int]], list of candidate token IDs for each token.
27+
verify_scores: List[List[float]], list of candidate token probability scores for each token.
28+
actual_candidate_lens: List[int], actual number of candidate tokens returned for each token.
29+
ori_token_ids: List[int], original token ID for each token.
30+
bid_list: List[int], bid for each token.
31+
"""
32+
token_num, vocab_size = probs.shape
33+
verify_tokens = []
34+
verify_scores = []
35+
actual_candidate_lens = []
36+
ori_token_ids = []
37+
bid_list = []
38+
39+
top_p_per_bid = np.array(top_p_per_bid)
40+
41+
num_bid = len(top_p_per_bid)
42+
43+
for token_id in range(token_num):
44+
# --- Compute ori_token_id and bid ---
45+
offset = output_padding_offset[token_id]
46+
ori_token_id = token_id + offset
47+
bid = ori_token_id // max_seq_len
48+
49+
# If the bid is out of the range of top_p_per_bid, you can choose to clamp it to [0, num_bid - 1]
50+
if bid < 0:
51+
bid = 0
52+
if bid >= num_bid:
53+
bid = (
54+
num_bid - 1
55+
) # Or you could raise an error or exception; here, we simply handle it by using the last bid.
56+
57+
token_top_p = top_p_per_bid[bid] # Dynamically retrieve the top_p value for the given bid.
58+
59+
ori_token_ids.append(ori_token_id)
60+
bid_list.append(bid)
61+
62+
# The probability distribution of the current token.
63+
token_probs = probs[token_id, :]
64+
# Sort by probability in descending order.
65+
sorted_indices = np.argsort(token_probs)[::-1]
66+
sorted_probs = token_probs[sorted_indices]
67+
68+
accumulated_prob = 0.0
69+
selected_indices = []
70+
selected_probs = []
71+
72+
for sort_idx, (prob, token_idx) in enumerate(zip(sorted_probs, sorted_indices)):
73+
if sort_idx >= candidates_len:
74+
break # Return at most candidates_len.
75+
76+
accumulated_prob += prob
77+
selected_indices.append(int(token_idx))
78+
selected_probs.append(float(prob))
79+
80+
if accumulated_prob >= token_top_p:
81+
break # The cumulative probability satisfies the Top-P criterion.
82+
83+
# If the Top-P threshold is not met, return the tokens that have already been selected.
84+
actual_len = len(selected_indices)
85+
actual_candidate_lens.append(actual_len)
86+
# Pad the insufficient token_id with 0.
87+
padded_token_ids = selected_indices + [0] * (candidates_len - actual_len)
88+
# Pad the insufficient score with 0.0.
89+
padded_scores = selected_probs + [0.0] * (candidates_len - actual_len)
90+
91+
verify_tokens.append(padded_token_ids)
92+
verify_scores.append(padded_scores)
93+
94+
return verify_scores, verify_tokens, actual_candidate_lens, ori_token_ids, bid_list
95+
96+
97+
def top_p_candidates_ref(probs, top_p, output_padding_offset, candidates_len, max_seq_len):
98+
ret = top_p_candidates_dynamic_top_p(probs, top_p, candidates_len, output_padding_offset, max_seq_len)
99+
return [ret[0], ret[1], ret[2]]
100+
101+
102+
class TestTopPCandidates(unittest.TestCase):
103+
def test_top_p_candidates(self):
104+
paddle.seed(42)
105+
token_num = 5
106+
vocab_size = 100
107+
candidates_len = 5
108+
max_seq_len = 120
109+
probs = paddle.randn([token_num, vocab_size])
110+
top_p = paddle.randn([token_num])
111+
bs = 5
112+
tokens = [1] * bs
113+
output_padding_offset = []
114+
opo_offset = 0
115+
for bid in range(bs):
116+
ts = tokens[bid]
117+
for i in range(ts):
118+
output_padding_offset.append(opo_offset)
119+
opo_offset += max_seq_len - ts
120+
output_padding_offset = paddle.to_tensor(output_padding_offset).astype(paddle.int32)
121+
ret1 = top_p_candidates(probs, top_p, output_padding_offset, candidates_len, max_seq_len)
122+
ret2 = top_p_candidates_ref(probs, top_p, output_padding_offset, candidates_len, max_seq_len)
123+
np.testing.assert_allclose(ret1[0].numpy(), ret2[0])
124+
np.testing.assert_allclose(ret1[1].numpy(), ret2[1])
125+
np.testing.assert_allclose(ret1[2].numpy(), ret2[2])
126+
127+
128+
if __name__ == "__main__":
129+
unittest.main()

0 commit comments

Comments
 (0)