Skip to content

Commit ee9832a

Browse files
authored
Add Top-k Python API. (#9973)
* Add topk Python API. * Add unit test. * Remove the repeated API.
1 parent e5b3eb9 commit ee9832a

File tree

6 files changed

+70
-66
lines changed

6 files changed

+70
-66
lines changed

doc/fluid/api/layers.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,3 +815,8 @@ zeros
815815
.. autofunction:: paddle.fluid.layers.zeros
816816
:noindex:
817817

818+
topk
819+
----
820+
821+
.. autofunction:: paddle.fluid.layers.topk
822+
:noindex:

paddle/fluid/operators/top_k_op.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ namespace paddle {
2424
namespace operators {
2525

2626
using Tensor = framework::Tensor;
27-
using LoDTensor = framework::LoDTensor;
2827

2928
template <typename T, int MajorType = Eigen::RowMajor,
3029
typename IndexType = Eigen::DenseIndex>
@@ -36,9 +35,9 @@ class TopkKernel : public framework::OpKernel<T> {
3635
void Compute(const framework::ExecutionContext& ctx) const override {
3736
// Get the top k elements of each row of input tensor
3837
// FIXME: only deal with matrix(2d tensor).
39-
auto* input = ctx.Input<LoDTensor>("X");
40-
auto* output = ctx.Output<LoDTensor>("Out");
41-
auto* indices = ctx.Output<LoDTensor>("Indices");
38+
auto* input = ctx.Input<Tensor>("X");
39+
auto* output = ctx.Output<Tensor>("Out");
40+
auto* indices = ctx.Output<Tensor>("Indices");
4241
// k is determined by Attr
4342
const size_t k = static_cast<int>(ctx.Attr<int>("k"));
4443

python/paddle/fluid/layers/control_flow.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
'Switch',
3333
'lod_rank_table',
3434
'max_sequence_len',
35-
'topk',
3635
'lod_tensor_to_array',
3736
'array_to_lod_tensor',
3837
'increment',
@@ -751,43 +750,6 @@ def max_sequence_len(rank_table):
751750
return res
752751

753752

754-
def topk(input, k):
755-
"""
756-
**topk**
757-
758-
This function performs the operation that selects the k entries in the input
759-
vector and outputs their values and indices as vectors. Thus topk_out[j] is
760-
the j-th largest entry in input, and its index is topk_indices[j]
761-
762-
Args:
763-
input (Variable|list): The input tensor that has all the data.
764-
k (int): The number of top elements that the function will pick.
765-
766-
Returns:
767-
Variable: The variable of type array that contains the k largest entries
768-
from input.
769-
Variable: The variable of type array that contains the indices of k
770-
largest entries from input.
771-
772-
Examples:
773-
.. code-block:: python
774-
775-
x = fluid.layers.data(name='x', shape=[10])
776-
k = 5
777-
array = fluid.layers.topk(x, k)
778-
"""
779-
helper = LayerHelper('topk', **locals())
780-
topk_out = helper.create_tmp_variable(dtype=input.dtype)
781-
topk_indices = helper.create_tmp_variable(dtype='int64')
782-
helper.append_op(
783-
type='top_k',
784-
inputs={'X': [input]},
785-
outputs={'Out': [topk_out],
786-
'Indices': [topk_indices]},
787-
attrs={'k': k})
788-
return topk_out, topk_indices
789-
790-
791753
def lod_tensor_to_array(x, table):
792754
""" Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY.
793755

python/paddle/fluid/layers/metric.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,18 @@
2020
from ..initializer import Normal, Constant
2121
from ..framework import Variable
2222
from ..param_attr import ParamAttr
23+
import nn
2324

2425
__all__ = ['accuracy', 'auc']
2526

2627

2728
def accuracy(input, label, k=1, correct=None, total=None):
2829
"""
2930
This function computes the accuracy using the input and label.
30-
The output is the top_k inputs and their indices.
31+
The output is the top k inputs and their indices.
3132
"""
3233
helper = LayerHelper("accuracy", **locals())
33-
topk_out = helper.create_tmp_variable(dtype=input.dtype)
34-
topk_indices = helper.create_tmp_variable(dtype="int64")
35-
helper.append_op(
36-
type="top_k",
37-
inputs={"X": [input]},
38-
outputs={"Out": [topk_out],
39-
"Indices": [topk_indices]},
40-
attrs={"k": k})
34+
topk_out, topk_indices = nn.topk(input, k=k)
4135
acc_out = helper.create_tmp_variable(dtype="float32")
4236
if correct is None:
4337
correct = helper.create_tmp_variable(dtype="int64")
@@ -68,12 +62,7 @@ def auc(input, label, curve='ROC', num_thresholds=200):
6862
helper = LayerHelper("auc", **locals())
6963
topk_out = helper.create_tmp_variable(dtype=input.dtype)
7064
topk_indices = helper.create_tmp_variable(dtype="int64")
71-
helper.append_op(
72-
type="top_k",
73-
inputs={"X": [input]},
74-
outputs={"Out": [topk_out],
75-
"Indices": [topk_indices]},
76-
attrs={"k": k})
65+
topk_out, topk_indices = nn.topk(input, k=k)
7766
auc_out = helper.create_tmp_variable(dtype="float32")
7867
if correct is None:
7968
correct = helper.create_tmp_variable(dtype="int64")

python/paddle/fluid/layers/nn.py

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
'edit_distance',
6161
'l2_normalize',
6262
'matmul',
63+
'topk',
6364
'warpctc',
6465
'sequence_reshape',
6566
'transpose',
@@ -2576,6 +2577,53 @@ def __check_input(x, y):
25762577
return out
25772578

25782579

2580+
def topk(input, k):
2581+
"""
2582+
This operator is used to find values and indices of the k largest entries
2583+
for the last dimension.
2584+
2585+
If the input is a vector (rank=1), finds the k largest entries in the vector
2586+
and outputs their values and indices as vectors. Thus values[j] is the j-th
2587+
largest entry in input, and its index is indices[j].
2588+
2589+
If the input is a Tensor with higher rank, this operator computes the top k
2590+
entries along the last dimension.
2591+
2592+
Args:
2593+
input(Variable): The input variable which can be a vector or Tensor with
2594+
higher rank.
2595+
k(int): An integer value to specify the top k largest elements.
2596+
2597+
Returns:
2598+
values(Variable): The k largest elements along each last dimensional
2599+
slice.
2600+
indices(Variable): The indices of values within the last dimension of
2601+
input.
2602+
2603+
Examples:
2604+
.. code-block:: python
2605+
2606+
top5_values, top5_indices = layers.topk(input, k=5)
2607+
"""
2608+
shape = input.shape
2609+
if k < 1 and k >= shape[-1]:
2610+
raise ValueError("k must be greater than 0 and less than %d." %
2611+
(shape[-1]))
2612+
2613+
helper = LayerHelper("top_k", **locals())
2614+
values = helper.create_tmp_variable(dtype=input.dtype)
2615+
indices = helper.create_tmp_variable(dtype="int64")
2616+
helper.append_op(
2617+
type="top_k",
2618+
inputs={"X": [input]},
2619+
outputs={"Out": [values],
2620+
"Indices": [indices]},
2621+
attrs={"k": k})
2622+
values.stop_gradient = True
2623+
indices.stop_gradient = True
2624+
return values, indices
2625+
2626+
25792627
def edit_distance(input, label, normalized=True, ignored_tokens=None,
25802628
name=None):
25812629
"""
@@ -2717,15 +2765,7 @@ def ctc_greedy_decoder(input, blank, name=None):
27172765
cost = fluid.layers.ctc_greedy_decoder(input=x, blank=0)
27182766
"""
27192767
helper = LayerHelper("ctc_greedy_decoder", **locals())
2720-
# top 1 op
2721-
topk_out = helper.create_tmp_variable(dtype=input.dtype)
2722-
topk_indices = helper.create_tmp_variable(dtype="int64")
2723-
helper.append_op(
2724-
type="top_k",
2725-
inputs={"X": [input]},
2726-
outputs={"Out": [topk_out],
2727-
"Indices": [topk_indices]},
2728-
attrs={"k": 1})
2768+
_, topk_indices = topk(input, k=1)
27292769

27302770
# ctc align op
27312771
ctc_out = helper.create_tmp_variable(dtype="int64")

python/paddle/fluid/tests/unittests/test_layers.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,15 @@ def test_label_smooth(self):
350350
self.assertIsNotNone(smooth_label)
351351
print(str(program))
352352

353+
def test_topk(self):
354+
program = Program()
355+
with program_guard(program):
356+
data = layers.data(name="label", shape=[200], dtype="float32")
357+
values, indices = layers.topk(data, k=5)
358+
self.assertIsNotNone(values)
359+
self.assertIsNotNone(indices)
360+
print(str(program))
361+
353362

354363
if __name__ == '__main__':
355364
unittest.main()

0 commit comments

Comments
 (0)