Skip to content

Commit 09e3354

Browse files
authored
Periodic iterate module (#221)
Add periodic module for executing batches of commands over the input data
1 parent f8e0db9 commit 09e3354

File tree

26 files changed

+434
-8
lines changed

26 files changed

+434
-8
lines changed

.gitmodules

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
[submodule "cpp/memgraph"]
22
path = cpp/memgraph
33
url = https://github.com/memgraph/memgraph.git
4-
branch = release/2.8
4+
branch = fix-creating-mgp-value-from-nullptr

Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ RUN curl https://sh.rustup.rs -sSf | sh -s -- -y \
5353
&& python3 /mage/setup build -p /usr/lib/memgraph/query_modules/
5454

5555
#DGL build from source
56-
5756
RUN git clone --recurse-submodules -b 0.9.x https://github.com/dmlc/dgl.git \
5857
&& cd dgl && mkdir build && cd build && cmake .. \
5958
&& make -j4 && cd ../python && python3 setup.py install

cpp/CMakeLists.txt

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# Memgraph Mage C++ Query Modules CMake configuration.
2-
cmake_minimum_required(VERSION 3.9)
2+
cmake_minimum_required(VERSION 3.14)
33
set(MEMGRAPH_MAGE_PROJECT_NAME "memgraph-mage")
44
project("${MEMGRAPH_MAGE_PROJECT_NAME}" LANGUAGES C CXX)
5+
include(FetchContent)
56

67
# setup CMake module path, defines path for include() and find_package()
78
# https://cmake.org/cmake/help/latest/variable/CMAKE_MODULE_PATH.html
@@ -81,9 +82,39 @@ target_include_directories(mage_uuid INTERFACE ${UUID_INCLUDE_DIR}/gsl)
8182
target_include_directories(mage_uuid INTERFACE ${UUID_INCLUDE_DIR})
8283
add_dependencies(mage_uuid uuid-proj)
8384

85+
# Add mgclient
86+
set(MGCLIENT_ROOT ${PROJECT_BINARY_DIR}/mgclient)
87+
ExternalProject_Add(mgclient-proj
88+
PREFIX ${MGCLIENT_ROOT}
89+
INSTALL_DIR ${MGCLIENT_ROOT}
90+
GIT_REPOSITORY https://github.com/memgraph/mgclient.git
91+
GIT_TAG T634-FL-add-column-names-to-cpp-client
92+
CMAKE_ARGS "-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>"
93+
"-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
94+
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
95+
"-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
96+
"-DCMAKE_POSITION_INDEPENDENT_CODE=ON"
97+
"-DBUILD_CPP_BINDINGS=ON"
98+
${MACOSX_OPENSSL_ROOTDIR_FLAG})
99+
100+
set(MGCLIENT_INCLUDE_DIRS ${MGCLIENT_ROOT}/include ${MGCLIENT_ROOT}/mgclient_cpp/include)
101+
set(MGCLIENT_LIBRARY_PATH ${MGCLIENT_ROOT}/lib/${CMAKE_FIND_LIBRARY_PREFIXES}mgclient.a)
102+
103+
add_library(mgclient STATIC IMPORTED)
104+
set_target_properties(mgclient PROPERTIES
105+
IMPORTED_LOCATION "${MGCLIENT_LIBRARY_PATH}"
106+
INTERFACE_LINK_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
107+
add_dependencies(mgclient mgclient-proj)
108+
84109
# mg_utility library
85110
add_subdirectory(mg_utility)
86111

112+
FetchContent_Declare(fmt
113+
GIT_REPOSITORY https://github.com/fmtlib/fmt.git
114+
GIT_TAG 9.1.0
115+
)
116+
FetchContent_MakeAvailable(fmt)
117+
87118
function(add_query_module target_name version src)
88119
add_library(${target_name} SHARED ${src})
89120
set_target_properties(${target_name} PROPERTIES SOVERSION "${version}")
@@ -107,4 +138,5 @@ add_subdirectory(degree_centrality_module)
107138
add_subdirectory(graph_util_module)
108139
add_subdirectory(node_similarity_module)
109140
add_subdirectory(distance_calculator)
141+
add_subdirectory(periodic_module)
110142
add_cugraph_subdirectory(cugraph_module)

cpp/memgraph

Submodule memgraph updated 389 files

cpp/periodic_module/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
include(GNUInstallDirs)
2+
3+
# Add all module files related to graph util module
4+
set(periodic_iterate_src
5+
periodic_iterate.cpp)
6+
7+
add_query_module(periodic 1 "${periodic_iterate_src}")
8+
9+
target_compile_definitions(periodic PRIVATE MGCLIENT_STATIC_DEFINE)
10+
target_include_directories(periodic PRIVATE ${MGCLIENT_INCLUDE_DIRS})
11+
target_link_libraries(periodic PRIVATE mgclient fmt::fmt)
Lines changed: 311 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,311 @@
1+
#include <fmt/core.h>
2+
#include <mgp.hpp>
3+
4+
#include "mgclient.hpp"
5+
6+
const char *kProcedurePeriodic = "iterate";
7+
const char *kArgumentInputQuery = "input_query";
8+
const char *kArgumentRunningQuery = "running_query";
9+
const char *kArgumentConfig = "config";
10+
const char *kConfigKeyBatchSize = "batch_size";
11+
const char *kBatchInternalName = "__batch";
12+
const char *kBatchRowInternalName = "__batch_row";
13+
const char *kReturnSuccess = "success";
14+
const char *kReturnNumBatches = "number_of_executed_batches";
15+
16+
const char *kMgHost = "MG_HOST";
17+
const char *kMgPort = "MG_PORT";
18+
const char *kMgUsername = "MG_USERNAME";
19+
const char *kMgPassword = "MG_PASSWORD";
20+
21+
const char *kDefaultHost = "localhost";
22+
const uint16_t kDefaultPort = 7687;
23+
24+
struct ParamNames {
25+
std::vector<std::string> node_names;
26+
std::vector<std::string> relationship_names;
27+
std::vector<std::string> primitive_names;
28+
};
29+
30+
ParamNames ExtractParamNames(const std::vector<std::string> &columns, const std::vector<mg::Value> &batch_row) {
31+
ParamNames res;
32+
for (size_t i = 0; i < columns.size(); i++) {
33+
if (batch_row[i].type() == mg::Value::Type::Node) {
34+
res.node_names.push_back(columns[i]);
35+
} else if (batch_row[i].type() == mg::Value::Type::Relationship) {
36+
res.relationship_names.push_back(columns[i]);
37+
} else {
38+
res.primitive_names.push_back(columns[i]);
39+
}
40+
}
41+
42+
return res;
43+
}
44+
45+
std::string Join(const std::vector<std::string> &strings, const std::string &delimiter) {
46+
if (!strings.size()) {
47+
return "";
48+
}
49+
50+
auto joined_strings_size = 0;
51+
for (const auto &string : strings) {
52+
joined_strings_size += string.size();
53+
}
54+
55+
std::string joined_strings;
56+
joined_strings.reserve(joined_strings_size + delimiter.size() * (strings.size() - 1));
57+
58+
joined_strings += strings[0];
59+
for (size_t i = 1; i < strings.size(); i++) {
60+
joined_strings += delimiter + strings[i];
61+
}
62+
63+
return joined_strings;
64+
}
65+
66+
std::string GetGraphFirstClassEntityAlias(const std::string &internal_name, const std::string &entity_name) {
67+
return fmt::format("{}.{} AS __{}_id", internal_name, entity_name, entity_name);
68+
}
69+
70+
std::string GetPrimitiveEntityAlias(const std::string &internal_name, const std::string &primitive_name) {
71+
return fmt::format("{}.{} AS {}", internal_name, primitive_name, primitive_name);
72+
}
73+
74+
std::string ConstructWithStatement(const ParamNames &names) {
75+
std::vector<std::string> with_entity_vector;
76+
for (const auto &node_name : names.node_names) {
77+
with_entity_vector.emplace_back(GetGraphFirstClassEntityAlias(kBatchRowInternalName, node_name));
78+
}
79+
for (const auto &rel_name : names.relationship_names) {
80+
with_entity_vector.emplace_back(GetGraphFirstClassEntityAlias(kBatchRowInternalName, rel_name));
81+
}
82+
for (const auto &prim_name : names.primitive_names) {
83+
with_entity_vector.emplace_back(GetPrimitiveEntityAlias(kBatchRowInternalName, prim_name));
84+
}
85+
86+
return fmt::format("WITH {}", Join(with_entity_vector, ", "));
87+
}
88+
89+
std::string ConstructMatchingNodeById(const std::string &node_name) {
90+
return fmt::format("MATCH ({}) WHERE ID({}) = __{}_id", node_name, node_name, node_name);
91+
}
92+
93+
std::string ConstructMatchingRelationshipById(const std::string &rel_name) {
94+
return fmt::format("MATCH ()-[{}]->() WHERE ID({}) = __{}_id", rel_name, rel_name, rel_name);
95+
}
96+
97+
std::string ConstructMatchGraphEntitiesById(const ParamNames &names) {
98+
std::string match_string = "";
99+
std::vector<std::string> match_by_id_vector;
100+
for (const auto &node_name : names.node_names) {
101+
match_by_id_vector.emplace_back(ConstructMatchingNodeById(node_name));
102+
}
103+
for (const auto &rel_name : names.relationship_names) {
104+
match_by_id_vector.emplace_back(ConstructMatchingRelationshipById(rel_name));
105+
}
106+
107+
if (match_by_id_vector.size()) {
108+
match_string = Join(match_by_id_vector, " ");
109+
}
110+
111+
return match_string;
112+
}
113+
114+
std::string ConstructQueryPrefix(const ParamNames &names) {
115+
if (!names.node_names.size() && !names.relationship_names.size() && !names.primitive_names.size()) {
116+
return std::string();
117+
}
118+
119+
auto unwind_batch = fmt::format("UNWIND ${} AS {}", kBatchInternalName, kBatchRowInternalName);
120+
auto with_variables = ConstructWithStatement(names);
121+
auto match_string = ConstructMatchGraphEntitiesById(names);
122+
123+
return fmt::format("{} {} {}", unwind_batch, with_variables, match_string);
124+
}
125+
126+
mg::Map ConstructQueryParams(const std::vector<std::string> &columns, const std::vector<std::vector<mg::Value>> &batch) {
127+
mg::Map params(1);
128+
mg::List list_value(batch.size());
129+
130+
auto param_row_size = columns.size();
131+
132+
for (size_t row = 0; row < batch.size(); row++) {
133+
mg::Map constructed_row(param_row_size);
134+
135+
for (size_t i = 0; i < param_row_size; i++) {
136+
if (batch[row][i].type() == mg::Value::Type::Node) {
137+
constructed_row.Insert(columns[i], mg::Value(static_cast<int64_t>(batch[row][i].ValueNode().id().AsInt())));
138+
} else if (batch[row][i].type() == mg::Value::Type::Relationship) {
139+
constructed_row.Insert(columns[i],
140+
mg::Value(static_cast<int64_t>(batch[row][i].ValueRelationship().id().AsInt())));
141+
} else {
142+
constructed_row.Insert(columns[i], batch[row][i]);
143+
}
144+
}
145+
146+
list_value.Append(mg::Value(std::move(constructed_row)));
147+
}
148+
149+
params.Insert(kBatchInternalName, mg::Value(std::move(list_value)));
150+
151+
return params;
152+
}
153+
154+
std::string ConstructFinalQuery(const std::string &running_query, const std::string &prefix_query) {
155+
return fmt::format("{} {}", prefix_query, running_query);
156+
}
157+
158+
void ExecuteRunningQuery(const std::string running_query, const std::vector<std::string> &columns,
159+
const std::vector<std::vector<mg::Value>> &batch) {
160+
if (!batch.size()) {
161+
return;
162+
}
163+
164+
auto param_names = ExtractParamNames(columns, batch[0]);
165+
auto prefix_query = ConstructQueryPrefix(param_names);
166+
auto final_query = ConstructFinalQuery(running_query, prefix_query);
167+
168+
auto query_params = ConstructQueryParams(columns, batch);
169+
170+
mg::Client::Params session_params{.host = "localhost", .port = 7687};
171+
auto client = mg::Client::Connect(session_params);
172+
if (!client) {
173+
throw std::runtime_error("Unable to connect to client!");
174+
}
175+
if (!client->Execute(final_query, query_params.AsConstMap())) {
176+
throw std::runtime_error("Error while executing periodic iterate!");
177+
}
178+
179+
client->DiscardAll();
180+
}
181+
182+
void ValidateBatchSize(const mgp::Value &batch_size_value) {
183+
if (batch_size_value.IsNull()) {
184+
throw std::runtime_error(fmt::format("Configuration parameter {} is not set.", kConfigKeyBatchSize));
185+
}
186+
if (!batch_size_value.IsInt()) {
187+
throw std::runtime_error("Batch size not provided as an integer in the periodic iterate configuration!");
188+
}
189+
190+
const auto batch_size = batch_size_value.ValueInt();
191+
if (batch_size <= 0) {
192+
throw std::runtime_error("Batch size must be a non-negative number!");
193+
}
194+
}
195+
196+
mg::Client::Params GetClientParams() {
197+
auto *host = kDefaultHost;
198+
auto port = kDefaultPort;
199+
auto *username = "";
200+
auto *password = "";
201+
202+
auto *maybe_host = std::getenv(kMgHost);
203+
if (maybe_host) {
204+
host = std::move(maybe_host);
205+
}
206+
207+
const auto *maybe_port = std::getenv(kMgPort);
208+
if (maybe_port) {
209+
port = static_cast<uint16_t>(std::move(*maybe_port));
210+
}
211+
212+
const auto *maybe_username = std::getenv(kMgUsername);
213+
if (maybe_username) {
214+
username = std::move(maybe_username);
215+
}
216+
217+
const auto *maybe_password = std::getenv(kMgPassword);
218+
if (maybe_password) {
219+
password = std::move(maybe_password);
220+
}
221+
222+
return mg::Client::Params{.host = std::move(host),
223+
.port = std::move(port),
224+
.username = std::move(username),
225+
.password = std::move(password)};
226+
}
227+
228+
void PeriodicIterate(mgp_list *args, mgp_graph *memgraph_graph, mgp_result *result, mgp_memory *memory) {
229+
mgp::memory = memory;
230+
const auto arguments = mgp::List(args);
231+
232+
auto num_of_executed_batches = 0;
233+
const auto record_factory = mgp::RecordFactory(result);
234+
auto record = record_factory.NewRecord();
235+
236+
const auto input_query = std::string(arguments[0].ValueString());
237+
const auto running_query = std::string(arguments[1].ValueString());
238+
const auto config = arguments[2].ValueMap();
239+
240+
const auto batch_size_value = config.At(kConfigKeyBatchSize);
241+
242+
try {
243+
ValidateBatchSize(batch_size_value);
244+
245+
const auto batch_size = batch_size_value.ValueInt();
246+
247+
mg::Client::Init();
248+
249+
auto client = mg::Client::Connect(GetClientParams());
250+
251+
if (!client) {
252+
throw std::runtime_error("Unable to connect to client!");
253+
}
254+
255+
if (!client->Execute(input_query)) {
256+
record.Insert(kReturnSuccess, false);
257+
return;
258+
}
259+
260+
auto columns = client->GetColumns();
261+
262+
std::vector<std::vector<mg::Value>> batch;
263+
batch.reserve(batch_size);
264+
int rows = 0;
265+
while (const auto maybe_result = client->FetchOne()) {
266+
if ((*maybe_result).size() == 0) {
267+
break;
268+
}
269+
270+
batch.push_back(std::move(*maybe_result));
271+
rows++;
272+
273+
if (rows == batch_size) {
274+
ExecuteRunningQuery(running_query, columns, batch);
275+
num_of_executed_batches++;
276+
rows = 0;
277+
batch.clear();
278+
}
279+
}
280+
281+
if (batch.size()) {
282+
ExecuteRunningQuery(running_query, columns, batch);
283+
num_of_executed_batches++;
284+
}
285+
286+
mg::Client::Finalize();
287+
288+
record.Insert(kReturnSuccess, true);
289+
record.Insert(kReturnNumBatches, static_cast<int64_t>(num_of_executed_batches));
290+
} catch (const std::exception &e) {
291+
record_factory.SetErrorMessage(e.what());
292+
record.Insert(kReturnSuccess, false);
293+
record.Insert(kReturnNumBatches, static_cast<int64_t>(num_of_executed_batches));
294+
}
295+
}
296+
297+
extern "C" int mgp_init_module(struct mgp_module *module, struct mgp_memory *memory) {
298+
try {
299+
mgp::memory = memory;
300+
mgp::AddProcedure(
301+
PeriodicIterate, kProcedurePeriodic, mgp::ProcedureType::Read,
302+
{mgp::Parameter(kArgumentInputQuery, mgp::Type::String),
303+
mgp::Parameter(kArgumentRunningQuery, mgp::Type::String), mgp::Parameter(kArgumentConfig, mgp::Type::Map)},
304+
{mgp::Return(kReturnSuccess, mgp::Type::Bool), mgp::Return(kReturnNumBatches, mgp::Type::Int)}, module, memory);
305+
} catch (const std::exception &e) {
306+
return 1;
307+
}
308+
return 0;
309+
}
310+
311+
extern "C" int mgp_shutdown_module() { return 0; }
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
CREATE ();

0 commit comments

Comments
 (0)