Skip to content

Commit 71cbc8b

Browse files
committed
optimize code
1 parent 694e894 commit 71cbc8b

File tree

2 files changed

+37
-23
lines changed

2 files changed

+37
-23
lines changed

paddle/fluid/operators/reader/ctr_reader.cc

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,9 @@ class MultiGzipReader : public Reader {
122122
size_t current_reader_index_ = 0;
123123
};
124124

125-
void CTRReader::ReadThread(const std::vector<std::string>& file_list,
126-
const std::vector<std::string>& slots,
127-
int batch_size,
128-
std::shared_ptr<LoDTensorBlockingQueue> queue) {
125+
void ReadThread(const std::vector<std::string>& file_list,
126+
const std::vector<std::string>& slots, int batch_size,
127+
std::shared_ptr<LoDTensorBlockingQueue> queue) {
129128
std::string line;
130129

131130
std::vector<std::unordered_map<std::string, std::vector<int64_t>>> batch_data;

paddle/fluid/operators/reader/ctr_reader.h

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,23 @@ namespace paddle {
3030
namespace operators {
3131
namespace reader {
3232

33+
void ReadThread(const std::vector<std::string>& file_list,
34+
const std::vector<std::string>& slots, int batch_size,
35+
std::shared_ptr<LoDTensorBlockingQueue> queue);
36+
3337
class CTRReader : public framework::FileReader {
3438
public:
3539
explicit CTRReader(const std::shared_ptr<LoDTensorBlockingQueue>& queue,
3640
int batch_size, int thread_num,
3741
const std::vector<std::string>& slots,
3842
const std::vector<std::string>& file_list)
39-
: framework::FileReader() {
40-
thread_num_ = thread_num;
41-
batch_size_ = batch_size;
43+
: thread_num_(thread_num),
44+
batch_size_(batch_size),
45+
slots_(slots),
46+
file_list_(file_list) {
4247
PADDLE_ENFORCE(queue != nullptr, "LoDTensorBlockingQueue must not be null");
4348
queue_ = queue;
44-
slots_ = slots;
45-
file_list_ = file_list;
49+
SplitFiles();
4650
}
4751

4852
~CTRReader() { queue_->Close(); }
@@ -53,30 +57,41 @@ class CTRReader : public framework::FileReader {
5357
if (!success) out->clear();
5458
}
5559

56-
void Shutdown() override { queue_->Close(); }
60+
void Shutdown() override {
61+
VLOG(3) << "Shutdown reader";
62+
for (auto& read_thread : read_threads_) {
63+
read_thread->join();
64+
}
65+
read_threads_.clear();
66+
queue_->Close();
67+
}
5768

5869
void Start() override {
70+
VLOG(3) << "Start reader";
5971
queue_->ReOpen();
60-
// for (int i = 0; i < thread_num_; i++) {
61-
// read_threads_.emplace_back(
62-
// new std::thread(std::bind(&CTRReader::ReadThread, this,
63-
// file_list_,
64-
// slots_, batch_size_, queue_)));
65-
// }
72+
for (int i = 0; i < file_groups_.size(); i++) {
73+
read_threads_.emplace_back(new std::thread(std::bind(
74+
&ReadThread, file_groups_[i], slots_, batch_size_, queue_)));
75+
}
6676
}
6777

6878
private:
69-
void ReadThread(const std::vector<std::string>& file_list,
70-
const std::vector<std::string>& slots, int batch_size,
71-
std::shared_ptr<LoDTensorBlockingQueue> queue);
79+
void SplitFiles() {
80+
file_groups_.resize(file_list_.size() > thread_num_ ? thread_num_
81+
: file_list_.size());
82+
for (int i = 0; i < file_list_.size(); ++i) {
83+
file_groups_[i % thread_num_].push_back(file_list_[i]);
84+
}
85+
}
7286

7387
private:
88+
const int thread_num_;
89+
const int batch_size_;
90+
const std::vector<std::string> slots_;
91+
const std::vector<std::string> file_list_;
7492
std::shared_ptr<LoDTensorBlockingQueue> queue_;
7593
std::vector<std::unique_ptr<std::thread>> read_threads_;
76-
int thread_num_;
77-
int batch_size_;
78-
std::vector<std::string> slots_;
79-
std::vector<std::string> file_list_;
94+
std::vector<std::vector<std::string>> file_groups_;
8095
};
8196

8297
} // namespace reader

0 commit comments

Comments
 (0)