Skip to content

Commit 3f7a7ea

Browse files
committed
Batch AUC (#13567)
* add distributed auc * add attr "is distributed" and config it * add distributed auc * add batch auc and code format * code format * auc optimize * metric_op optimize * code clean * bug fix and code clean * bug fix and code clean * code optimize * code optimize * api spec update * Comments optimized * add mutex * Revert: add mutex * remove distribute metric * remove distribute metric * spec modifyed * add annotation, test=develop * keep API compatibility test=develop
1 parent 644bad1 commit 3f7a7ea

File tree

6 files changed

+163
-61
lines changed

6 files changed

+163
-61
lines changed

paddle/fluid/API.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=[], varargs='args', keywords='kw
286286
paddle.fluid.layers.box_coder ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
287287
paddle.fluid.layers.polygon_box_transform ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
288288
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
289-
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk'], varargs=None, keywords=None, defaults=('ROC', 4095, 1))
289+
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
290290
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
291291
paddle.fluid.layers.natural_exp_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))
292292
paddle.fluid.layers.inverse_time_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))

paddle/fluid/operators/auc_op.cc

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,16 @@ class AucOp : public framework::OperatorWithKernel {
3636
"Out and Label should have same height.");
3737

3838
int num_pred_buckets = ctx->Attrs().Get<int>("num_thresholds") + 1;
39+
int slide_steps = ctx->Attrs().Get<int>("slide_steps");
40+
41+
PADDLE_ENFORCE_GE(num_pred_buckets, 1, "num_thresholds must larger than 1");
42+
PADDLE_ENFORCE_GE(slide_steps, 0, "slide_steps must be natural number");
3943

4044
ctx->SetOutputDim("AUC", {1});
41-
ctx->SetOutputDim("BatchAUC", {1});
42-
ctx->SetOutputDim("StatPosOut", {num_pred_buckets});
43-
ctx->SetOutputDim("StatNegOut", {num_pred_buckets});
45+
46+
slide_steps = slide_steps == 0 ? 1 : slide_steps;
47+
ctx->SetOutputDim("StatPosOut", {slide_steps, num_pred_buckets});
48+
ctx->SetOutputDim("StatNegOut", {slide_steps, num_pred_buckets});
4449
}
4550

4651
protected:
@@ -62,25 +67,27 @@ class AucOpMaker : public framework::OpProtoAndCheckerMaker {
6267
AddInput("Label",
6368
"A 2D int tensor indicating the label of the training data. "
6469
"shape: [batch_size, 1]");
70+
6571
// TODO(typhoonzero): support weight input
6672
AddInput("StatPos", "Statistic value when label = 1");
6773
AddInput("StatNeg", "Statistic value when label = 0");
6874

6975
AddOutput("AUC",
7076
"A scalar representing the "
7177
"current area-under-the-curve.");
72-
AddOutput("BatchAUC", "The AUC for current batch");
78+
7379
AddOutput("StatPosOut", "Statistic value when label = 1");
7480
AddOutput("StatNegOut", "Statistic value when label = 0");
7581

7682
AddAttr<std::string>("curve", "Curve type, can be 'ROC' or 'PR'.")
7783
.SetDefault("ROC");
7884

79-
AddAttr<int>("num_thresholds",
80-
"The number of thresholds to use when discretizing the"
81-
" roc curve.")
85+
AddAttr<int>(
86+
"num_thresholds",
87+
"The number of thresholds to use when discretizing the roc curve.")
8288
.SetDefault((2 << 12) - 1);
83-
89+
AddAttr<int>("slide_steps", "Use slide steps to calc batch auc.")
90+
.SetDefault(1);
8491
AddComment(R"DOC(
8592
Area Under The Curve (AUC) Operator.
8693

paddle/fluid/operators/auc_op.h

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,29 @@ class AucKernel : public framework::OpKernel<T> {
3232

3333
std::string curve = ctx.Attr<std::string>("curve");
3434
int num_thresholds = ctx.Attr<int>("num_thresholds");
35+
// buckets contain numbers from 0 to num_thresholds
3536
int num_pred_buckets = num_thresholds + 1;
37+
int slide_steps = ctx.Attr<int>("slide_steps");
3638

3739
// Only use output var for now, make sure it's persistable and
3840
// not cleaned up for each batch.
3941
auto *auc = ctx.Output<Tensor>("AUC");
4042
auto *stat_pos = ctx.Output<Tensor>("StatPosOut");
4143
auto *stat_neg = ctx.Output<Tensor>("StatNegOut");
4244

43-
auto *stat_pos_data = stat_pos->mutable_data<int64_t>(ctx.GetPlace());
44-
auto *stat_neg_data = stat_neg->mutable_data<int64_t>(ctx.GetPlace());
45-
calcAuc(ctx, label, predict, stat_pos_data, stat_neg_data, num_thresholds,
46-
auc);
45+
auto *origin_stat_pos = stat_pos->mutable_data<int64_t>(ctx.GetPlace());
46+
auto *origin_stat_neg = stat_neg->mutable_data<int64_t>(ctx.GetPlace());
4747

48-
auto *batch_auc = ctx.Output<Tensor>("BatchAUC");
49-
std::vector<int64_t> stat_pos_batch(num_pred_buckets, 0);
50-
std::vector<int64_t> stat_neg_batch(num_pred_buckets, 0);
51-
calcAuc(ctx, label, predict, stat_pos_batch.data(), stat_neg_batch.data(),
52-
num_thresholds, batch_auc);
48+
std::vector<int64_t> stat_pos_data(num_pred_buckets, 0);
49+
std::vector<int64_t> stat_neg_data(num_pred_buckets, 0);
50+
51+
auto stat_pos_calc = stat_pos_data.data();
52+
auto stat_neg_calc = stat_neg_data.data();
53+
54+
statAuc(label, predict, num_pred_buckets, num_thresholds, slide_steps,
55+
origin_stat_pos, origin_stat_neg, &stat_pos_calc, &stat_neg_calc);
56+
57+
calcAuc(ctx, stat_pos_calc, stat_neg_calc, num_thresholds, auc);
5358
}
5459

5560
private:
@@ -58,29 +63,76 @@ class AucKernel : public framework::OpKernel<T> {
5863
return (X1 > X2 ? (X1 - X2) : (X2 - X1)) * (Y1 + Y2) / 2.0;
5964
}
6065

61-
inline static void calcAuc(const framework::ExecutionContext &ctx,
62-
const framework::Tensor *label,
66+
inline static void statAuc(const framework::Tensor *label,
6367
const framework::Tensor *predict,
64-
int64_t *stat_pos, int64_t *stat_neg,
65-
int num_thresholds,
66-
framework::Tensor *auc_tensor) {
68+
const int num_pred_buckets,
69+
const int num_thresholds, const int slide_steps,
70+
int64_t *origin_stat_pos, int64_t *origin_stat_neg,
71+
int64_t **stat_pos, int64_t **stat_neg) {
6772
size_t batch_size = predict->dims()[0];
6873
size_t inference_width = predict->dims()[1];
6974
const T *inference_data = predict->data<T>();
7075
const auto *label_data = label->data<int64_t>();
7176

72-
auto *auc = auc_tensor->mutable_data<double>(ctx.GetPlace());
73-
7477
for (size_t i = 0; i < batch_size; i++) {
7578
uint32_t binIdx = static_cast<uint32_t>(
7679
inference_data[i * inference_width + 1] * num_thresholds);
7780
if (label_data[i]) {
78-
stat_pos[binIdx] += 1.0;
81+
(*stat_pos)[binIdx] += 1.0;
7982
} else {
80-
stat_neg[binIdx] += 1.0;
83+
(*stat_neg)[binIdx] += 1.0;
8184
}
8285
}
8386

87+
int bucket_length = num_pred_buckets * sizeof(int64_t);
88+
89+
// will stat auc unlimited.
90+
if (slide_steps == 0) {
91+
for (int slide = 0; slide < num_pred_buckets; ++slide) {
92+
origin_stat_pos[slide] += (*stat_pos)[slide];
93+
origin_stat_neg[slide] += (*stat_neg)[slide];
94+
}
95+
96+
*stat_pos = origin_stat_pos;
97+
*stat_neg = origin_stat_neg;
98+
99+
} else {
100+
for (int slide = 1; slide < slide_steps; ++slide) {
101+
int dst_idx = (slide - 1) * num_pred_buckets;
102+
int src_inx = slide * num_pred_buckets;
103+
std::memcpy(origin_stat_pos + dst_idx, origin_stat_pos + src_inx,
104+
bucket_length);
105+
std::memcpy(origin_stat_neg + dst_idx, origin_stat_neg + src_inx,
106+
bucket_length);
107+
}
108+
109+
std::memcpy(origin_stat_pos + (slide_steps - 1) * num_pred_buckets,
110+
*stat_pos, bucket_length);
111+
std::memcpy(origin_stat_neg + (slide_steps - 1) * num_pred_buckets,
112+
*stat_neg, bucket_length);
113+
114+
std::memset(*stat_pos, 0, bucket_length);
115+
std::memset(*stat_neg, 0, bucket_length);
116+
117+
for (int slide = 0; slide < num_pred_buckets; ++slide) {
118+
int stat_pos_steps = 0;
119+
int stat_neg_steps = 0;
120+
for (int step = 0; step < slide_steps; ++step) {
121+
stat_pos_steps += origin_stat_pos[slide + step * num_pred_buckets];
122+
stat_neg_steps += origin_stat_neg[slide + step * num_pred_buckets];
123+
}
124+
(*stat_pos)[slide] += stat_pos_steps;
125+
(*stat_neg)[slide] += stat_neg_steps;
126+
}
127+
}
128+
}
129+
130+
inline static void calcAuc(const framework::ExecutionContext &ctx,
131+
int64_t *stat_pos, int64_t *stat_neg,
132+
int num_thresholds,
133+
framework::Tensor *auc_tensor) {
134+
auto *auc = auc_tensor->mutable_data<double>(ctx.GetPlace());
135+
84136
*auc = 0.0f;
85137

86138
double totPos = 0.0;
@@ -96,7 +148,6 @@ class AucKernel : public framework::OpKernel<T> {
96148
totPos += stat_pos[idx];
97149
totNeg += stat_neg[idx];
98150
*auc += trapezoidArea(totNeg, totNegPrev, totPos, totPosPrev);
99-
100151
--idx;
101152
}
102153

python/paddle/fluid/layers/metric_op.py

Lines changed: 51 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,12 @@ def accuracy(input, label, k=1, correct=None, total=None):
7878
return acc_out
7979

8080

81-
def auc(input, label, curve='ROC', num_thresholds=2**12 - 1, topk=1):
81+
def auc(input,
82+
label,
83+
curve='ROC',
84+
num_thresholds=2**12 - 1,
85+
topk=1,
86+
slide_steps=1):
8287
"""
8388
**Area Under the Curve (AUC) Layer**
8489
@@ -105,6 +110,8 @@ def auc(input, label, curve='ROC', num_thresholds=2**12 - 1, topk=1):
105110
num_thresholds(int): The number of thresholds to use when discretizing
106111
the roc curve. Default 200.
107112
topk(int): only topk number of prediction output will be used for auc.
113+
slide_steps: when calc batch auc, we can not only use step currently but the previous steps can be used. slide_steps=1 means use the current step, slide_steps=3 means use current step and the previous second steps, slide_steps=0 use all of the steps.
114+
108115
109116
Returns:
110117
Variable: A scalar representing the current AUC.
@@ -120,16 +127,48 @@ def auc(input, label, curve='ROC', num_thresholds=2**12 - 1, topk=1):
120127
auc_out = helper.create_tmp_variable(dtype="float64")
121128
batch_auc_out = helper.create_tmp_variable(dtype="float64")
122129
# make tp, tn, fp, fn persistable, so that can accumulate all batches.
130+
131+
# for batch auc
132+
batch_stat_pos = helper.create_global_variable(
133+
persistable=True,
134+
dtype='int64',
135+
shape=[slide_steps, num_thresholds + 1])
136+
batch_stat_neg = helper.create_global_variable(
137+
persistable=True,
138+
dtype='int64',
139+
shape=[slide_steps, num_thresholds + 1])
140+
141+
# for global auc
123142
stat_pos = helper.create_global_variable(
124-
persistable=True, dtype='int64', shape=[num_thresholds + 1])
143+
persistable=True, dtype='int64', shape=[1, num_thresholds + 1])
125144
stat_neg = helper.create_global_variable(
126-
persistable=True, dtype='int64', shape=[num_thresholds + 1])
145+
persistable=True, dtype='int64', shape=[1, num_thresholds + 1])
127146

128-
for var in [stat_pos, stat_neg]:
147+
for var in [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg]:
129148
helper.set_variable_initializer(
130149
var, Constant(
131150
value=0.0, force_cpu=True))
132151

152+
# Batch AUC
153+
helper.append_op(
154+
type="auc",
155+
inputs={
156+
"Predict": [input],
157+
"Label": [label],
158+
"StatPos": [batch_stat_pos],
159+
"StatNeg": [batch_stat_neg]
160+
},
161+
attrs={
162+
"curve": curve,
163+
"num_thresholds": num_thresholds,
164+
"slide_steps": slide_steps
165+
},
166+
outputs={
167+
"AUC": [batch_auc_out],
168+
"StatPosOut": [batch_stat_pos],
169+
"StatNegOut": [batch_stat_neg]
170+
})
171+
# Global AUC
133172
helper.append_op(
134173
type="auc",
135174
inputs={
@@ -138,12 +177,16 @@ def auc(input, label, curve='ROC', num_thresholds=2**12 - 1, topk=1):
138177
"StatPos": [stat_pos],
139178
"StatNeg": [stat_neg]
140179
},
141-
attrs={"curve": curve,
142-
"num_thresholds": num_thresholds},
180+
attrs={
181+
"curve": curve,
182+
"num_thresholds": num_thresholds,
183+
"slide_steps": 0
184+
},
143185
outputs={
144186
"AUC": [auc_out],
145-
"BatchAUC": [batch_auc_out],
146187
"StatPosOut": [stat_pos],
147188
"StatNegOut": [stat_neg]
148189
})
149-
return auc_out, batch_auc_out, [stat_pos, stat_neg]
190+
return auc_out, batch_auc_out, [
191+
batch_stat_pos, batch_stat_neg, stat_pos, stat_neg
192+
]

python/paddle/fluid/tests/unittests/test_auc_op.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@ def setUp(self):
3636
"StatPos": stat_pos,
3737
"StatNeg": stat_neg
3838
}
39-
self.attrs = {'curve': 'ROC', 'num_thresholds': num_thresholds}
39+
self.attrs = {
40+
'curve': 'ROC',
41+
'num_thresholds': num_thresholds,
42+
"slide_steps": 1
43+
}
4044

4145
python_auc = metrics.Auc(name="auc",
4246
curve='ROC',
@@ -45,7 +49,6 @@ def setUp(self):
4549

4650
self.outputs = {
4751
'AUC': np.array(python_auc.eval()),
48-
'BatchAUC': np.array(python_auc.eval()),
4952
'StatPosOut': np.array(python_auc._stat_pos),
5053
'StatNegOut': np.array(python_auc._stat_neg)
5154
}

0 commit comments

Comments
 (0)