|
| 1 | +import unittest |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import paddle |
| 5 | + |
| 6 | +from fastdeploy.model_executor.ops.gpu import top_p_candidates |
| 7 | + |
| 8 | + |
| 9 | +def top_p_candidates_dynamic_top_p(probs, top_p_per_bid, candidates_len, output_padding_offset, max_seq_len): |
| 10 | + """ |
| 11 | + Simulate TopPCandidates, supporting dynamic selection of Top-P values based on bid. |
| 12 | +
|
| 13 | + Args: |
| 14 | + probs: numpy.ndarray, shape [token_num, vocab_size] |
| 15 | + Probability distribution over the vocabulary for each token. |
| 16 | + top_p_per_bid: list or numpy.ndarray, shape [num_bid] |
| 17 | + Top-P values for each logical block (bid), e.g., [0.7, 0.9, 0.5]. |
| 18 | + candidates_len: int |
| 19 | + Maximum number of candidate tokens to return for each token. |
| 20 | + output_padding_offset: numpy.ndarray, shape [token_num] |
| 21 | + Offset for each token, used to compute the original token ID (ori_token_id). |
| 22 | + max_seq_len: int |
| 23 | + Used to compute bid = ori_token_id // max_seq_len. |
| 24 | +
|
| 25 | + Returns: |
| 26 | + verify_tokens: List[List[int]], list of candidate token IDs for each token. |
| 27 | + verify_scores: List[List[float]], list of candidate token probability scores for each token. |
| 28 | + actual_candidate_lens: List[int], actual number of candidate tokens returned for each token. |
| 29 | + ori_token_ids: List[int], original token ID for each token. |
| 30 | + bid_list: List[int], bid for each token. |
| 31 | + """ |
| 32 | + token_num, vocab_size = probs.shape |
| 33 | + verify_tokens = [] |
| 34 | + verify_scores = [] |
| 35 | + actual_candidate_lens = [] |
| 36 | + ori_token_ids = [] |
| 37 | + bid_list = [] |
| 38 | + |
| 39 | + top_p_per_bid = np.array(top_p_per_bid) |
| 40 | + |
| 41 | + num_bid = len(top_p_per_bid) |
| 42 | + |
| 43 | + for token_id in range(token_num): |
| 44 | + # --- Compute ori_token_id and bid --- |
| 45 | + offset = output_padding_offset[token_id] |
| 46 | + ori_token_id = token_id + offset |
| 47 | + bid = ori_token_id // max_seq_len |
| 48 | + |
| 49 | + # If the bid is out of the range of top_p_per_bid, you can choose to clamp it to [0, num_bid - 1] |
| 50 | + if bid < 0: |
| 51 | + bid = 0 |
| 52 | + if bid >= num_bid: |
| 53 | + bid = ( |
| 54 | + num_bid - 1 |
| 55 | + ) # Or you could raise an error or exception; here, we simply handle it by using the last bid. |
| 56 | + |
| 57 | + token_top_p = top_p_per_bid[bid] # Dynamically retrieve the top_p value for the given bid. |
| 58 | + |
| 59 | + ori_token_ids.append(ori_token_id) |
| 60 | + bid_list.append(bid) |
| 61 | + |
| 62 | + # The probability distribution of the current token. |
| 63 | + token_probs = probs[token_id, :] |
| 64 | + # Sort by probability in descending order. |
| 65 | + sorted_indices = np.argsort(token_probs)[::-1] |
| 66 | + sorted_probs = token_probs[sorted_indices] |
| 67 | + |
| 68 | + accumulated_prob = 0.0 |
| 69 | + selected_indices = [] |
| 70 | + selected_probs = [] |
| 71 | + |
| 72 | + for sort_idx, (prob, token_idx) in enumerate(zip(sorted_probs, sorted_indices)): |
| 73 | + if sort_idx >= candidates_len: |
| 74 | + break # Return at most candidates_len. |
| 75 | + |
| 76 | + accumulated_prob += prob |
| 77 | + selected_indices.append(int(token_idx)) |
| 78 | + selected_probs.append(float(prob)) |
| 79 | + |
| 80 | + if accumulated_prob >= token_top_p: |
| 81 | + break # The cumulative probability satisfies the Top-P criterion. |
| 82 | + |
| 83 | + # If the Top-P threshold is not met, return the tokens that have already been selected. |
| 84 | + actual_len = len(selected_indices) |
| 85 | + actual_candidate_lens.append(actual_len) |
| 86 | + # Pad the insufficient token_id with 0. |
| 87 | + padded_token_ids = selected_indices + [0] * (candidates_len - actual_len) |
| 88 | + # Pad the insufficient score with 0.0. |
| 89 | + padded_scores = selected_probs + [0.0] * (candidates_len - actual_len) |
| 90 | + |
| 91 | + verify_tokens.append(padded_token_ids) |
| 92 | + verify_scores.append(padded_scores) |
| 93 | + |
| 94 | + return verify_scores, verify_tokens, actual_candidate_lens, ori_token_ids, bid_list |
| 95 | + |
| 96 | + |
| 97 | +def top_p_candidates_ref(probs, top_p, output_padding_offset, candidates_len, max_seq_len): |
| 98 | + ret = top_p_candidates_dynamic_top_p(probs, top_p, candidates_len, output_padding_offset, max_seq_len) |
| 99 | + return [ret[0], ret[1], ret[2]] |
| 100 | + |
| 101 | + |
| 102 | +class TestTopPCandidates(unittest.TestCase): |
| 103 | + def test_top_p_candidates(self): |
| 104 | + paddle.seed(42) |
| 105 | + token_num = 5 |
| 106 | + vocab_size = 100 |
| 107 | + candidates_len = 5 |
| 108 | + max_seq_len = 120 |
| 109 | + probs = paddle.randn([token_num, vocab_size]) |
| 110 | + top_p = paddle.randn([token_num]) |
| 111 | + bs = 5 |
| 112 | + tokens = [1] * bs |
| 113 | + output_padding_offset = [] |
| 114 | + opo_offset = 0 |
| 115 | + for bid in range(bs): |
| 116 | + ts = tokens[bid] |
| 117 | + for i in range(ts): |
| 118 | + output_padding_offset.append(opo_offset) |
| 119 | + opo_offset += max_seq_len - ts |
| 120 | + output_padding_offset = paddle.to_tensor(output_padding_offset).astype(paddle.int32) |
| 121 | + ret1 = top_p_candidates(probs, top_p, output_padding_offset, candidates_len, max_seq_len) |
| 122 | + ret2 = top_p_candidates_ref(probs, top_p, output_padding_offset, candidates_len, max_seq_len) |
| 123 | + np.testing.assert_allclose(ret1[0].numpy(), ret2[0]) |
| 124 | + np.testing.assert_allclose(ret1[1].numpy(), ret2[1]) |
| 125 | + np.testing.assert_allclose(ret1[2].numpy(), ret2[2]) |
| 126 | + |
| 127 | + |
| 128 | +if __name__ == "__main__": |
| 129 | + unittest.main() |
0 commit comments