Skip to content

Commit b0b5466

Browse files
authored
Added functionality to import/export Graph from/to Binary File (#554)
1 parent 457b27d commit b0b5466

File tree

4 files changed

+686
-2
lines changed

4 files changed

+686
-2
lines changed

include/CXXGraph/Graph/Graph_decl.h

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,39 @@ std::ostream &operator<<(std::ostream &o, const AdjacencyList<T> &adj);
8181
/// Class that implement the Graph. ( This class is not Thread Safe )
8282
template <typename T>
8383
class Graph {
84+
public:
85+
/**
86+
* @brief Write the graph to a binary file
87+
* @param workingDir The parent directory of the output file
88+
* @param fileName The output filename (without extension)
89+
* @param writeNodeFeatures Whether to include node features
90+
* @param writeEdgeWeights Whether to include edge weights
91+
* @return 0 if successful, negative value on error:
92+
* -1: Cannot open file
93+
* -2: Write error
94+
*/
95+
int writeToBinaryFile(const std::string &workingDir,
96+
const std::string &fileName,
97+
bool writeNodeFeatures = false,
98+
bool writeEdgeWeights = true) const;
99+
100+
/**
101+
* @brief Read the graph from a binary file
102+
* @param workingDir The parent directory of the input file
103+
* @param fileName The input filename (without extension)
104+
* @param readNodeFeatures Whether to read node features
105+
* @param readEdgeWeights Whether to read edge weights
106+
* @return 0 if successful, negative value on error:
107+
* -1: Cannot open file
108+
* -2: Invalid file format
109+
* -3: Unsupported version
110+
* -4: Read error
111+
*/
112+
int readFromBinaryFile(const std::string &workingDir,
113+
const std::string &fileName,
114+
bool readNodeFeatures = false,
115+
bool readEdgeWeights = true);
116+
84117
private:
85118
T_EdgeSet<T> edgeSet = {};
86119
T_NodeSet<T> isolatedNodesSet = {};
@@ -104,6 +137,47 @@ class Graph {
104137
int writeToDot(const std::string &workingDir, const std::string &OFileName,
105138
const std::string &graphName) const;
106139
int readFromDot(const std::string &workingDir, const std::string &fileName);
140+
141+
// Binary file format constants
142+
static constexpr uint32_t BINARY_MAGIC_NUMBER = 0x47525048; // "GRPH"
143+
static constexpr uint32_t BINARY_VERSION = 1;
144+
static constexpr uint64_t BINARY_FLAG_HAS_NODE_FEATURES = 0x01;
145+
static constexpr uint64_t BINARY_FLAG_HAS_EDGE_WEIGHTS = 0x02;
146+
147+
// Type trait to check if T is serializable to binary
148+
template <typename U, typename = void>
149+
struct is_binary_serializable : std::false_type {};
150+
151+
template <typename U>
152+
struct is_binary_serializable<
153+
U, std::void_t<decltype(std::declval<std::ofstream &>().write(
154+
reinterpret_cast<const char *>(&std::declval<const U &>()),
155+
sizeof(U)))>> : std::is_trivially_copyable<U> {};
156+
157+
// Helper functions for binary I/O
158+
void writeBinaryString(std::ofstream &out, const std::string &str) const;
159+
std::string readBinaryString(std::ifstream &in) const;
160+
161+
/**
162+
* @brief Write the graph to a binary file
163+
* @param filepath The full path to the output file
164+
* @param writeNodeFeatures Whether to include node features
165+
* @param writeEdgeWeights Whether to include edge weights
166+
* @return 0 if successful, negative value on error
167+
*/
168+
int writeToBinary(const std::string &filepath, bool writeNodeFeatures,
169+
bool writeEdgeWeights) const;
170+
171+
/**
172+
* @brief Read the graph from a binary file
173+
* @param filepath The full path to the input file
174+
* @param readNodeFeatures Whether to read node features
175+
* @param readEdgeWeights Whether to read edge weights
176+
* @return 0 if successful, negative value on error
177+
*/
178+
int readFromBinary(const std::string &filepath, bool readNodeFeatures,
179+
bool readEdgeWeights);
180+
107181
void recreateGraph(
108182
std::unordered_map<std::string, std::pair<std::string, std::string>>
109183
&edgeMap,

include/CXXGraph/Graph/IO/InputOperation_impl.hpp

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,126 @@ int Graph<T>::readFromMTXFile(const std::string &workingDir,
248248
return 0;
249249
}
250250

251+
template <typename T>
252+
int Graph<T>::readFromBinaryFile(const std::string &workingDir,
253+
const std::string &fileName,
254+
bool readNodeFeatures, bool readEdgeWeights) {
255+
std::string filepath = workingDir + "/" + fileName + ".bin";
256+
return readFromBinary(filepath, readNodeFeatures, readEdgeWeights);
257+
}
258+
259+
template <typename T>
260+
int Graph<T>::readFromBinary(const std::string &filepath, bool readNodeFeatures,
261+
bool readEdgeWeights) {
262+
std::ifstream in(filepath, std::ios::binary);
263+
if (!in.is_open()) {
264+
return -1;
265+
}
266+
267+
try {
268+
// Read and verify header
269+
uint32_t magic;
270+
in.read(reinterpret_cast<char *>(&magic), sizeof(magic));
271+
if (magic != BINARY_MAGIC_NUMBER) {
272+
return -2; // Invalid file format
273+
}
274+
275+
uint32_t version;
276+
in.read(reinterpret_cast<char *>(&version), sizeof(version));
277+
if (version != BINARY_VERSION) {
278+
return -3; // Unsupported version
279+
}
280+
281+
uint64_t numNodes, numEdges, flags;
282+
in.read(reinterpret_cast<char *>(&numNodes), sizeof(numNodes));
283+
in.read(reinterpret_cast<char *>(&numEdges), sizeof(numEdges));
284+
in.read(reinterpret_cast<char *>(&flags), sizeof(flags));
285+
286+
bool hasNodeFeatures = (flags & BINARY_FLAG_HAS_NODE_FEATURES) != 0;
287+
bool hasEdgeWeights = (flags & BINARY_FLAG_HAS_EDGE_WEIGHTS) != 0;
288+
289+
// Read nodes
290+
std::unordered_map<std::string, shared<Node<T>>> nodeMap;
291+
for (uint64_t i = 0; i < numNodes; ++i) {
292+
std::string nodeId = readBinaryString(in);
293+
294+
T nodeData{};
295+
if (hasNodeFeatures && readNodeFeatures) {
296+
uint32_t dataSize;
297+
in.read(reinterpret_cast<char *>(&dataSize), sizeof(dataSize));
298+
299+
if (dataSize > 0 && is_binary_serializable<T>::value) {
300+
in.read(reinterpret_cast<char *>(&nodeData), sizeof(T));
301+
}
302+
} else if (hasNodeFeatures) {
303+
// Skip node data if present but not reading
304+
uint32_t dataSize;
305+
in.read(reinterpret_cast<char *>(&dataSize), sizeof(dataSize));
306+
if (dataSize > 0) {
307+
in.seekg(dataSize, std::ios::cur);
308+
}
309+
} else {
310+
uint32_t dataSize;
311+
in.read(reinterpret_cast<char *>(&dataSize), sizeof(dataSize));
312+
}
313+
314+
auto node = std::make_shared<Node<T>>(nodeId, std::move(nodeData));
315+
nodeMap[nodeId] = node;
316+
}
317+
318+
// Read edges
319+
for (uint64_t i = 0; i < numEdges; ++i) {
320+
std::string edgeId = readBinaryString(in);
321+
std::string node1Id = readBinaryString(in);
322+
std::string node2Id = readBinaryString(in);
323+
324+
uint8_t edgeFlags;
325+
in.read(reinterpret_cast<char *>(&edgeFlags), sizeof(edgeFlags));
326+
327+
bool isDirected = (edgeFlags & 0x01) != 0;
328+
bool isWeighted = (edgeFlags & 0x02) != 0;
329+
330+
double weight = 0.0;
331+
if (hasEdgeWeights && isWeighted) {
332+
if (readEdgeWeights) {
333+
in.read(reinterpret_cast<char *>(&weight), sizeof(weight));
334+
} else {
335+
in.seekg(sizeof(double), std::ios::cur);
336+
}
337+
}
338+
339+
auto node1 = nodeMap[node1Id];
340+
auto node2 = nodeMap[node2Id];
341+
342+
shared<Edge<T>> edge;
343+
if (isDirected) {
344+
if (isWeighted && readEdgeWeights) {
345+
edge = std::make_shared<DirectedWeightedEdge<T>>(edgeId, node1, node2,
346+
weight);
347+
} else {
348+
edge = std::make_shared<DirectedEdge<T>>(edgeId, node1, node2);
349+
}
350+
} else {
351+
if (isWeighted && readEdgeWeights) {
352+
edge = std::make_shared<UndirectedWeightedEdge<T>>(edgeId, node1,
353+
node2, weight);
354+
} else {
355+
edge = std::make_shared<UndirectedEdge<T>>(edgeId, node1, node2);
356+
}
357+
}
358+
359+
this->addEdge(edge);
360+
}
361+
362+
in.close();
363+
return 0;
364+
365+
} catch (const std::exception &e) {
366+
in.close();
367+
return -4;
368+
}
369+
}
370+
251371
template <typename T>
252372
int Graph<T>::readFromDot(const std::string &workingDir,
253373
const std::string &fileName) {
@@ -436,5 +556,15 @@ void Graph<T>::recreateGraph(
436556
}
437557
}
438558

559+
// Helper function to read string with length prefix
560+
template <typename T>
561+
std::string Graph<T>::readBinaryString(std::ifstream &in) const {
562+
uint32_t len;
563+
in.read(reinterpret_cast<char *>(&len), sizeof(len));
564+
std::string str(len, '\0');
565+
in.read(&str[0], len);
566+
return str;
567+
}
568+
439569
} // namespace CXXGraph
440-
#endif // __CXXGRAPH_INPUTOPERATION_IMPL_H__
570+
#endif // __CXXGRAPH_INPUTOPERATION_IMPL_H__

include/CXXGraph/Graph/IO/OutputOperation_impl.hpp

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,100 @@ int Graph<T>::writeToMTXFile(const std::string &workingDir,
192192
return 0;
193193
}
194194

195+
template <typename T>
196+
int Graph<T>::writeToBinaryFile(const std::string &workingDir,
197+
const std::string &fileName,
198+
bool writeNodeFeatures,
199+
bool writeEdgeWeights) const {
200+
std::string filepath = workingDir + "/" + fileName + ".bin";
201+
return writeToBinary(filepath, writeNodeFeatures, writeEdgeWeights);
202+
}
203+
204+
template <typename T>
205+
int Graph<T>::writeToBinary(const std::string &filepath, bool writeNodeFeatures,
206+
bool writeEdgeWeights) const {
207+
std::ofstream out(filepath, std::ios::binary);
208+
if (!out.is_open()) {
209+
return -1;
210+
}
211+
212+
try {
213+
// Write header
214+
out.write(reinterpret_cast<const char *>(&BINARY_MAGIC_NUMBER),
215+
sizeof(BINARY_MAGIC_NUMBER));
216+
out.write(reinterpret_cast<const char *>(&BINARY_VERSION),
217+
sizeof(BINARY_VERSION));
218+
219+
auto nodeSet = this->getNodeSet();
220+
auto edgeSet = this->getEdgeSet();
221+
222+
uint64_t numNodes = nodeSet.size();
223+
uint64_t numEdges = edgeSet.size();
224+
uint64_t flags = 0;
225+
226+
if (writeNodeFeatures) flags |= BINARY_FLAG_HAS_NODE_FEATURES;
227+
if (writeEdgeWeights) flags |= BINARY_FLAG_HAS_EDGE_WEIGHTS;
228+
229+
out.write(reinterpret_cast<const char *>(&numNodes), sizeof(numNodes));
230+
out.write(reinterpret_cast<const char *>(&numEdges), sizeof(numEdges));
231+
out.write(reinterpret_cast<const char *>(&flags), sizeof(flags));
232+
233+
// Write nodes
234+
for (const auto &node : nodeSet) {
235+
writeBinaryString(out, node->getUserId());
236+
237+
if (writeNodeFeatures) {
238+
// For trivially copyable types, write directly
239+
if constexpr (is_binary_serializable<T>::value) {
240+
uint32_t dataSize = sizeof(T);
241+
out.write(reinterpret_cast<const char *>(&dataSize),
242+
sizeof(dataSize));
243+
const T &data = node->getData();
244+
out.write(reinterpret_cast<const char *>(&data), sizeof(T));
245+
} else {
246+
// For non-trivially copyable types, write 0 size
247+
uint32_t dataSize = 0;
248+
out.write(reinterpret_cast<const char *>(&dataSize),
249+
sizeof(dataSize));
250+
}
251+
} else {
252+
uint32_t dataSize = 0;
253+
out.write(reinterpret_cast<const char *>(&dataSize), sizeof(dataSize));
254+
}
255+
}
256+
257+
// Write edges
258+
for (const auto &edge : edgeSet) {
259+
writeBinaryString(out, edge->getUserId());
260+
writeBinaryString(out, edge->getNodePair().first->getUserId());
261+
writeBinaryString(out, edge->getNodePair().second->getUserId());
262+
263+
uint8_t edgeFlags = 0;
264+
if (edge->isDirected().has_value() && edge->isDirected().value()) {
265+
edgeFlags |= 0x01;
266+
}
267+
if (edge->isWeighted().has_value() && edge->isWeighted().value()) {
268+
edgeFlags |= 0x02;
269+
}
270+
out.write(reinterpret_cast<const char *>(&edgeFlags), sizeof(edgeFlags));
271+
272+
// Write weight if edge is weighted and we're saving weights
273+
if (writeEdgeWeights && (edgeFlags & 0x02)) {
274+
double weight =
275+
std::dynamic_pointer_cast<const Weighted>(edge)->getWeight();
276+
out.write(reinterpret_cast<const char *>(&weight), sizeof(weight));
277+
}
278+
}
279+
280+
out.close();
281+
return 0;
282+
283+
} catch (const std::exception &e) {
284+
out.close();
285+
return -2;
286+
}
287+
}
288+
195289
template <typename T>
196290
int Graph<T>::writeToDot(const std::string &workingDir,
197291
const std::string &OFileName,
@@ -284,6 +378,15 @@ void Graph<T>::writeGraphToStream(std::ostream &oGraph, std::ostream &oNodeFeat,
284378
}
285379
}
286380

381+
// Helper function to write string with length prefix
382+
template <typename T>
383+
void Graph<T>::writeBinaryString(std::ofstream &out,
384+
const std::string &str) const {
385+
uint32_t len = static_cast<uint32_t>(str.length());
386+
out.write(reinterpret_cast<const char *>(&len), sizeof(len));
387+
out.write(str.c_str(), len);
388+
}
389+
287390
template <typename T>
288391
std::ostream &operator<<(std::ostream &os, const Graph<T> &graph) {
289392
os << "Graph:\n";
@@ -355,4 +458,4 @@ std::ostream &operator<<(std::ostream &os, const AdjacencyList<T> &adj) {
355458
}
356459

357460
} // namespace CXXGraph
358-
#endif // __CXXGRAPH_OUTPUTOPERATION_IMPL_H__
461+
#endif // __CXXGRAPH_OUTPUTOPERATION_IMPL_H__

0 commit comments

Comments
 (0)