Skip to content

Commit 9393d08

Browse files
authored
feat(interactive): Implement oss uploader and downloader for interactive (#4577)
- Introducing `aliyun-oss-cpp-sdk` as third-party - Implement oss-object-reader/write, allowing dumping graph data to oss and load graph data from oss.
1 parent 5bcefbd commit 9393d08

File tree

13 files changed

+783
-1
lines changed

13 files changed

+783
-1
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@
1313
[submodule "flex/third_party/parallel-hashmap"]
1414
path = flex/third_party/parallel-hashmap
1515
url = https://github.com/greg7mdp/parallel-hashmap.git
16+
[submodule "flex/third_party/aliyun-oss-cpp-sdk"]
17+
path = flex/third_party/aliyun-oss-cpp-sdk
18+
url = https://github.com/aliyun/aliyun-oss-cpp-sdk.git

flex/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ option(USE_PTHASH "Whether to use pthash" OFF)
1717
option(OPTIMIZE_FOR_HOST "Whether to optimize on host" ON) # Whether to build optimized code on host
1818
option(USE_STATIC_ARROW "Whether to use static arrow" OFF) # Whether to link arrow statically, default is OFF
1919
option(BUILD_WITH_OTEL "Whether to build with opentelemetry-cpp" OFF) # Whether to build with opentelemetry-cpp, default is OFF
20+
option(BUILD_WITH_OSS "Whether to build with oss support" OFF) # Whether to build with oss support, default is OFF
2021

2122
#print options
2223
message(STATUS "Build test: ${BUILD_TEST}")
@@ -51,6 +52,13 @@ if(LINUX_KERNEL_VERSION VERSION_GREATER_EQUAL "4.5")
5152
add_definitions(-DUSE_COPY_FILE_RANGE)
5253
endif()
5354

55+
if (BUILD_WITH_OSS)
56+
add_definitions(-DBUILD_WITH_OSS)
57+
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/aliyun-oss-cpp-sdk/sdk/include)
58+
set(TARGET_OUTPUT_NAME_PREFIX "alibabacloud-oss-" CACHE STRING "The target's output name prefix")
59+
add_subdirectory(third_party/aliyun-oss-cpp-sdk)
60+
endif()
61+
5462
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../)
5563

5664
if(USE_PTHASH)

flex/bin/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ install(PROGRAMS load_plan_and_gen.sh DESTINATION bin)
4444
include_directories(${Boost_INCLUDE_DIRS})
4545
add_executable(bulk_loader bulk_loader.cc)
4646
target_link_libraries(bulk_loader flex_rt_mutable_graph ${GFLAGS_LIBRARIES})
47+
if (BUILD_WITH_OSS)
48+
target_link_libraries(bulk_loader cpp-sdk)
49+
endif()
4750
install_without_export_flex_target(bulk_loader)
4851

4952
add_executable(stored_procedure_runner stored_procedure_runner.cc)

flex/bin/bulk_loader.cc

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424
#include "flex/engines/graph_db/database/graph_db.h"
2525
#include "flex/engines/http_server/options.h"
2626

27+
#ifdef BUILD_WITH_OSS
28+
#include <boost/process.hpp>
29+
#include "flex/utils/remote/oss_storage.h"
30+
#endif
31+
2732
namespace bpo = boost::program_options;
2833

2934
static std::string work_dir;
@@ -44,6 +49,73 @@ void signal_handler(int signal) {
4449
}
4550
}
4651

52+
#ifdef BUILD_WITH_OSS
53+
54+
void check_oss_object_not_exist(std::string& data_path,
55+
std::string& object_path,
56+
gs::OSSConf& oss_conf) {
57+
auto pos = data_path.find("/", 6);
58+
if (pos == std::string::npos) {
59+
LOG(FATAL) << "Invalid data path: " << data_path;
60+
}
61+
oss_conf.bucket_name_ = data_path.substr(6, pos - 6);
62+
object_path = data_path.substr(pos + 1);
63+
oss_conf.load_conf_from_env();
64+
// check whether the object exists
65+
auto oss_reader = std::make_shared<gs::OSSRemoteStorageDownloader>(oss_conf);
66+
if (!oss_reader || !oss_reader->Open().ok()) {
67+
LOG(FATAL) << "Failed to open oss reader";
68+
}
69+
std::vector<std::string> path_list;
70+
auto status = oss_reader->List(object_path, path_list);
71+
if (status.ok() && path_list.size() > 0) {
72+
LOG(FATAL) << "Object already exists: " << object_path
73+
<< ", list size: " << path_list.size()
74+
<< ", please remove the object and try again.";
75+
}
76+
// use a random directory
77+
data_path = "/tmp/" + std::to_string(time(nullptr));
78+
}
79+
80+
int32_t upload_data_dir_to_oss(const std::filesystem::path& data_dir_path,
81+
const std::string& object_path,
82+
const gs::OSSConf& oss_conf) {
83+
// zip the data directory
84+
std::string zip_file = data_dir_path.string() + ".zip";
85+
std::string zip_cmd = "zip -r " + zip_file + " " + data_dir_path.string();
86+
boost::process::child zip_process(zip_cmd);
87+
zip_process.wait();
88+
89+
int res = zip_process.exit_code();
90+
if (res != 0) {
91+
LOG(ERROR) << "Failed to zip data directory: " << zip_cmd
92+
<< ", code: " << res;
93+
return -1;
94+
}
95+
96+
auto oss_writer = std::make_shared<gs::OSSRemoteStorageUploader>(oss_conf);
97+
if (!oss_writer || !oss_writer->Open().ok()) {
98+
LOG(ERROR) << "Failed to open oss writer";
99+
return -1;
100+
}
101+
auto status = oss_writer->Put(zip_file, object_path, false);
102+
if (!status.ok()) {
103+
LOG(ERROR) << "Failed to upload data to oss: " << status.ToString();
104+
return -1;
105+
}
106+
status = oss_writer->Close();
107+
if (!status.ok()) {
108+
LOG(ERROR) << "Failed to close oss writer: " << status.ToString();
109+
return -1;
110+
}
111+
LOG(INFO) << "Successfully uploaded data to oss: " << object_path
112+
<< ", it is in zip format";
113+
std::filesystem::remove(zip_file);
114+
std::filesystem::remove_all(data_dir_path);
115+
return 0;
116+
}
117+
#endif
118+
47119
int main(int argc, char** argv) {
48120
bpo::options_description desc("Usage:");
49121
/**
@@ -90,6 +162,17 @@ int main(int argc, char** argv) {
90162
}
91163

92164
std::string data_path = "";
165+
/**
166+
* If the data path is an oss path, the data will be uploaded to oss after
167+
* loading to a temporary directory. To improve the performance of the
168+
* performance, bulk_loader will zip the data directory before uploading.
169+
* The data path should be in the format of oss://bucket_name/object_path
170+
*/
171+
#ifdef BUILD_WITH_OSS
172+
bool upload_to_oss = false;
173+
std::string object_path = "";
174+
auto oss_conf = gs::OSSConf();
175+
#endif
93176
std::string bulk_load_config_path = "";
94177
std::string graph_schema_path = "";
95178

@@ -141,6 +224,16 @@ int main(int argc, char** argv) {
141224
vm["use-mmap-vector"].as<bool>());
142225
}
143226

227+
if (data_path.find("oss://") == 0) {
228+
#ifdef BUILD_WITH_OSS
229+
upload_to_oss = true;
230+
check_oss_object_not_exist(data_path, object_path, oss_conf);
231+
#else
232+
LOG(ERROR) << "OSS is not supported in this build";
233+
return -1;
234+
#endif
235+
}
236+
144237
std::filesystem::path data_dir_path(data_path);
145238
if (!std::filesystem::exists(data_dir_path)) {
146239
std::filesystem::create_directory(data_dir_path);
@@ -185,5 +278,11 @@ int main(int argc, char** argv) {
185278
t += grape::GetCurrentTime();
186279
LOG(INFO) << "Finished bulk loading in " << t << " seconds.";
187280

281+
#ifdef BUILD_WITH_OSS
282+
if (upload_to_oss) {
283+
return upload_data_dir_to_oss(data_dir_path, object_path, oss_conf);
284+
}
285+
#endif
286+
188287
return 0;
189288
}

flex/tests/hqps/CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11

22
# file(GLOB_RECURSE GS_TEST_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cc")
33
file(GLOB GS_TEST_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/*.cc")
4+
if (NOT BUILD_WITH_OSS)
5+
list(REMOVE_ITEM GS_TEST_FILES "oss_test.cc")
6+
endif()
47

58
foreach(f ${GS_TEST_FILES})
69
string(REGEX MATCH "^(.*)\\.[^.]*$" dummy ${f})
710
set(T_NAME ${CMAKE_MATCH_1})
811
message(STATUS "Found graphscope test - " ${T_NAME})
912
add_executable(${T_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/${T_NAME}.cc)
10-
target_link_libraries(${T_NAME} flex_plan_proto flex_graph_db)
13+
target_link_libraries(${T_NAME} flex_plan_proto flex_graph_db flex_utils)
14+
if (BUILD_WITH_OSS)
15+
target_link_libraries(${T_NAME} cpp-sdk)
16+
endif()
1117
endforeach()

flex/tests/hqps/oss_test.cc

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
2+
/** Copyright 2020 Alibaba Group Holding Limited.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "flex/utils/remote/oss_storage.h"
18+
19+
#include <glog/logging.h>
20+
21+
int main(int argc, char** argv) {
22+
if (argc != 6) {
23+
std::cerr << "usage: oss_object_writer <access-key> <access-secret> "
24+
"<endpoint> <bucket> <input-file> "
25+
<< std::endl;
26+
}
27+
28+
gs::OSSConf conf;
29+
conf.accesskey_id_ = argv[1];
30+
conf.accesskey_secret_ = argv[2];
31+
conf.endpoint_ = argv[3];
32+
conf.bucket_name_ = argv[4];
33+
34+
gs::OSSRemoteStorageUploader writer(conf);
35+
gs::OSSRemoteStorageDownloader reader(conf);
36+
37+
CHECK(writer.Open().ok()) << "Open OSS writer failed";
38+
CHECK(reader.Open().ok()) << "Open OSS reader failed";
39+
40+
std::string input_file = argv[5];
41+
std::string object_name = "test_object";
42+
if (writer.Put(input_file, object_name).ok()) {
43+
LOG(INFO) << "Put object " << object_name << " success";
44+
} else {
45+
LOG(ERROR) << "Put object " << object_name << " failed";
46+
}
47+
48+
std::string output_file = "output_file";
49+
if (reader.Get(object_name, output_file).ok()) {
50+
LOG(INFO) << "Get object " << object_name << " success";
51+
} else {
52+
LOG(ERROR) << "Get object " << object_name << " failed";
53+
}
54+
55+
// delete object
56+
if (writer.Delete(object_name).ok()) {
57+
LOG(INFO) << "Delete object " << object_name << " success";
58+
} else {
59+
LOG(ERROR) << "Delete object " << object_name << " failed";
60+
}
61+
62+
return 0;
63+
}
Submodule aliyun-oss-cpp-sdk added at 0632d33

flex/utils/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,21 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/flex
7373

7474
############################################ Build Utils Library ############################################
7575
file(GLOB_RECURSE UTILS_SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cc")
76+
77+
if (NOT BUILD_WITH_OSS)
78+
list(REMOVE_ITEM UTILS_SRC_FILES "${CMAKE_CURRENT_SOURCE_DIR}/remote/oss_storage.cc")
79+
endif()
80+
81+
7682
add_library(flex_utils SHARED ${UTILS_SRC_FILES} ${ERROR_PROTO_SRCS})
7783
target_include_directories(flex_utils PUBLIC
7884
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
7985
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)
8086
# Link the static library of arrow, to save the trouble of linking the shared library of arrow
8187
target_link_libraries(flex_utils PUBLIC ${Protobuf_LIBRARIES} ${ARROW_LIB} ${YAML_CPP_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES})
88+
if (BUILD_WITH_OSS)
89+
target_link_libraries(flex_utils PRIVATE cpp-sdk)
90+
endif()
8291

8392

8493
find_package(mimalloc 1.8 REQUIRED)

flex/utils/file_utils.cc

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/** Copyright 2020 Alibaba Group Holding Limited.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
#include "flex/utils/file_utils.h"
17+
18+
namespace gs {
19+
20+
bool read_string_from_file(const std::string& file_path, std::string& content) {
21+
std::ifstream inputFile(file_path);
22+
23+
if (!inputFile.is_open()) {
24+
LOG(ERROR) << "Error: Could not open the file " << file_path;
25+
return false;
26+
}
27+
std::ostringstream buffer;
28+
buffer << inputFile.rdbuf();
29+
content = buffer.str();
30+
return true;
31+
}
32+
33+
bool write_string_to_file(const std::string& content,
34+
const std::string& file_path) {
35+
std::ofstream outputFile(file_path, std::ios::out | std::ios::trunc);
36+
37+
if (!outputFile.is_open()) {
38+
LOG(ERROR) << "Error: Could not open the file " << file_path;
39+
return false;
40+
}
41+
outputFile << content;
42+
return true;
43+
}
44+
45+
} // namespace gs

flex/utils/file_utils.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/** Copyright 2020 Alibaba Group Holding Limited.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
#ifndef UTILS_FILE_UTILS_H_
17+
#define UTILS_FILE_UTILS_H_
18+
19+
#include <fstream>
20+
#include <iostream>
21+
#include <sstream>
22+
#include <string>
23+
24+
#include <glog/logging.h>
25+
26+
namespace gs {
27+
28+
bool read_string_from_file(const std::string& file_path, std::string& content);
29+
30+
bool write_string_to_file(const std::string& content,
31+
const std::string& file_path);
32+
33+
} // namespace gs
34+
35+
#endif // UTILS_FILE_UTILS_H_

0 commit comments

Comments
 (0)