Skip to content

Commit 9a49abe

Browse files
committed
return pair
1 parent d7a2094 commit 9a49abe

File tree

5 files changed

+15
-10
lines changed

5 files changed

+15
-10
lines changed

cpp/include/ioutils.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class IoUtils {
3232
~IoUtils();
3333
bool Init(std::string opt_path);
3434
int LoadStreamFile(std::string filepath);
35-
int ReadStreamForVocab(int num_lines);
35+
std::pair<int, bool> ReadStreamForVocab(int num_lines);
3636
void GetWordVocab(int min_count);
3737
private:
3838
void ParseLine(std::string line, std::vector<std::string>& line_vec);

cpp/src/ioutils.cc

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,20 +59,25 @@ int IoUtils::LoadStreamFile(std::string filepath) {
5959
return count;
6060
}
6161

62-
int IoUtils::ReadStreamForVocab(int num_lines) {
62+
std::pair<int, bool> IoUtils::ReadStreamForVocab(int num_lines) {
6363
int read_cnt = 0;
6464
std::string line;
6565
std::vector<std::string> line_vec;
66-
while (getline(stream_fin_, line) and read_cnt < num_lines) {
66+
while (not stream_fin_.eof() and read_cnt < num_lines) {
67+
getline(stream_fin_, line);
6768
ParseLine(line, line_vec);
6869
for (auto& word: line_vec) {
6970
if (not word_count_.count(word)) word_count_[word] = 0;
7071
word_count_[word]++;
7172
}
7273
read_cnt++;
7374
}
74-
if (read_cnt < num_lines) stream_fin_.close();
75-
return read_cnt;
75+
bool finished = false;
76+
if (stream_fin_.eof()) {
77+
stream_fin_.close();
78+
finished = true;
79+
}
80+
return {read_cnt, finished};
7681
}
7782

7883
void IoUtils::GetWordVocab(int min_count) {

cusim/ioutils/bindings.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class IoUtilsBind {
2727
return obj_.LoadStreamFile(filepath);
2828
}
2929

30-
int ReadStreamForVocab(int num_lines) {
30+
std::pair<int, bool> ReadStreamForVocab(int num_lines) {
3131
return obj_.ReadStreamForVocab(num_lines);
3232
}
3333

cusim/ioutils/pyioutils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ def load_stream_vocab(self, filepath, min_count, chunk_lines=100000):
3333
full_num_lines = self.obj.load_stream_file(filepath)
3434
pbar = tqdm.trange(full_num_lines)
3535
while True:
36-
num_lines = self.obj.read_stream_for_vocab(chunk_lines)
36+
num_lines, finished = self.obj.read_stream_for_vocab(chunk_lines)
3737
pbar.update(num_lines)
38-
if num_lines < chunk_lines:
39-
pbar.close()
38+
if finished:
4039
break
40+
pbar.close()
4141
self.obj.get_word_vocab(min_count)

examples/example1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def download():
3333
def run():
3434
download()
3535
iou = IoUtils()
36-
iou.load_stream_vocab(DATA_PATH, 5)
36+
iou.load_stream_vocab(DATA_PATH, 5, 10000)
3737

3838

3939
if __name__ == "__main__":

0 commit comments

Comments
 (0)