Skip to content

Commit b4a3d4f

Browse files
committed
add files
1 parent 162c788 commit b4a3d4f

File tree

6 files changed

+193
-17
lines changed

6 files changed

+193
-17
lines changed

cpp/include/ioutils.hpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright (c) 2021 Jisang Yoon
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the Apache 2.0 license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
#pragma once
7+
8+
#include <set>
9+
#include <random>
10+
#include <memory>
11+
#include <string>
12+
#include <fstream>
13+
#include <utility>
14+
#include <queue>
15+
#include <deque>
16+
#include <functional>
17+
#include <vector>
18+
#include <cmath>
19+
#include <chrono> // NOLINT
20+
#include <iostream>
21+
#include <unordered_map>
22+
23+
namespace cusim {
24+
25+
class IoUtils {
26+
public:
27+
IoUtils();
28+
~IoUtils();
29+
void LoadGensimVocab(std::string filepath, int min_count);
30+
private:
31+
std::vector<std::string> parse_line(std::string line);
32+
std::unordered_map<std::string, int> word_idmap_;
33+
std::vector<std::string> word_list_;
34+
}; // class IoUtils
35+
36+
} // namespace cusim

cpp/include/log.hpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Copyright (c) 2020 Jisang Yoon
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the Apache 2.0 license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
7+
// reference: https://github.com/kakao/buffalo/blob/5f571c2c7d8227e6625c6e538da929e4db11b66d/lib/misc/log.cc
8+
#pragma once
9+
#include <memory>
10+
11+
#define SPDLOG_EOL ""
12+
#define SPDLOG_TRACE_ON
13+
#include "spdlog/spdlog.h"
14+
#include "spdlog/sinks/stdout_color_sinks.h"
15+
16+
#define __FILENAME__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__)
17+
18+
#define INFO(x, ...) logger_->info("[{}:{}] " x "\n", __FILENAME__, __LINE__, __VA_ARGS__);
19+
#define DEBUG(x, ...) logger_->debug("[{}:{}] " x "\n", __FILENAME__, __LINE__, __VA_ARGS__);
20+
#define WARN(x, ...) logger_->warn("[{}:{}] " x "\n", __FILENAME__, __LINE__, __VA_ARGS__);
21+
#define TRACE(x, ...) logger_->trace("[{}:{}] " x "\n", __FILENAME__, __LINE__, __VA_ARGS__);
22+
#define CRITICAL(x, ...) logger_->critical("[{}:{}] " x "\n", __FILENAME__, __LINE__, __VA_ARGS__);
23+
24+
#define INFO0(x) logger_->info("[{}:{}] " x "\n", __FILENAME__, __LINE__);
25+
#define DEBUG0(x) logger_->debug("[{}:{}] " x "\n", __FILENAME__, __LINE__);
26+
#define WARN0(x) logger_->warn("[{}:{}] " x "\n", __FILENAME__, __LINE__);
27+
#define TRACE0(x) logger_->trace("[{}:{}] " x "\n", __FILENAME__, __LINE__);
28+
#define CRITICAL0(x) logger_->critical("[{}:{}] " x "\n", __FILENAME__, __LINE__);
29+
30+
namespace cusim {
31+
32+
class CuHNSWLogger {
33+
public:
34+
CuHNSWLogger();
35+
std::shared_ptr<spdlog::logger>& get_logger();
36+
void set_log_level(int level);
37+
int get_log_level();
38+
39+
private:
40+
static int global_logging_level_;
41+
std::shared_ptr<spdlog::logger> logger_;
42+
}; // class CuHNSWLogger
43+
44+
} // namespace cusim

cpp/include/types.hpp

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,3 @@
3939
#endif
4040

4141
#define WARP_SIZE 32
42-
43-
struct Neighbor {
44-
cuda_scalar distance;
45-
int nodeid;
46-
bool checked;
47-
};
48-
49-
// to manage the compatibility with hnswlib
50-
typedef unsigned int tableint;
51-
typedef unsigned int sizeint;
52-
typedef float scalar;
53-
typedef size_t labeltype;
54-
55-
enum DIST_TYPE {
56-
DOT,
57-
L2,
58-
};

cpp/src/culda.cu

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Copyright (c) 2020 Jisang Yoon
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the Apache 2.0 license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
#include "culda.cuh"
7+
8+
namespace cusim {
9+
10+
CuLDA::CuLDA() {
11+
logger_ = CuSimLogger().get_logger();
12+
}
13+
14+
CuLDA::~CuLDA() {}
15+
16+
} // namespace cusim

cpp/src/ioutils.cc

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Copyright (c) 2021 Jisang Yoon
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the Apache 2.0 license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
#include "ioutils.hpp"
7+
8+
namespace cusim {
9+
10+
IoUtils::IoUtils() {
11+
logger_ = CuSimLogger().get_logger();
12+
}
13+
14+
IoUtils::~IoUtils() {}
15+
16+
std::vector<std::string> IoUtils::parse_line(std::string line) {
17+
int n = line.size();
18+
std::vector<std::string> ret;
19+
std::string element;
20+
for (int i = 0; i < n; ++i) {
21+
if (line[i] == ' ') {
22+
ret.push_back(element);
23+
element.clear();
24+
} else {
25+
element += line[i];
26+
}
27+
}
28+
if (element.size() > 0) {
29+
ret.push_back(element);
30+
}
31+
return ret;
32+
}
33+
34+
void IoUtils::LoadGensimVocab(std::string filepath, int min_count) {
35+
INFO("read gensim file to generate vocabulary: {}, min_count: {}", filepath, min_count);
36+
std::ifstream fin(filepath.c_str());
37+
std::unordered_map<std::string, int> word_count;
38+
while (not fin.eof()) {
39+
std::string line;
40+
getline(fin, line);
41+
std::vector<std::string> line_vec = parse_line(line);
42+
for (auto& word: line_vec) {
43+
if (not word_count.count(word)) word_count[word] = 0;
44+
word_count[word]++;
45+
}
46+
}
47+
INFO("number of raw words: {}", word_count.size());
48+
word_idmap_.clear();
49+
word_list_.clear();
50+
for (auto& it: word_count) {
51+
if (it.second >= min_count) {
52+
word_idmap_[it.first] = vocab_.size();
53+
word_list_.push_back(it.first);
54+
}
55+
}
56+
INFO("number of words after filtering: {}", word_list_.size());
57+
}
58+
59+
} // namespace cusim

cpp/src/log.cc

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright (c) 2020 Jisang Yoon
2+
// All rights reserved.
3+
//
4+
// This source code is licensed under the Apache 2.0 license found in the
5+
// LICENSE file in the root directory of this source tree.
6+
7+
// reference: https://github.com/kakao/buffalo/blob/5f571c2c7d8227e6625c6e538da929e4db11b66d/lib/misc/log.cc
8+
#include "log.hpp"
9+
10+
11+
namespace cusim {
12+
int CuSimLogger::global_logging_level_ = 2;
13+
14+
CuSimLogger::CuHNSWLogger() {
15+
spdlog::set_pattern("[%^%-8l%$] %Y-%m-%d %H:%M:%S %v");
16+
logger_ = spdlog::default_logger();
17+
}
18+
19+
std::shared_ptr<spdlog::logger>& CuHNSWLogger::get_logger() {
20+
return logger_;
21+
}
22+
23+
void CuSimLogger::set_log_level(int level) {
24+
global_logging_level_ = level;
25+
switch (level) {
26+
case 0: spdlog::set_level(spdlog::level::off); break;
27+
case 1: spdlog::set_level(spdlog::level::warn); break;
28+
case 2: spdlog::set_level(spdlog::level::info); break;
29+
case 3: spdlog::set_level(spdlog::level::debug); break;
30+
default: spdlog::set_level(spdlog::level::trace); break;
31+
}
32+
}
33+
34+
int CuSimLogger::get_log_level() {
35+
return global_logging_level_;
36+
}
37+
38+
} // namespace cusim

0 commit comments

Comments
 (0)