Skip to content

Commit ba310c9

Browse files
author
Giorgi Lomia
committed
Added a way to compute property bit width.
1 parent e7fb031 commit ba310c9

File tree

1 file changed

+57
-30
lines changed

1 file changed

+57
-30
lines changed

tools/graph-stats/graph-memory-stats.cpp

Lines changed: 57 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,13 @@ PrintMapping(const std::unordered_map<std::string, int64_t>& u) {
8989
}
9090
}
9191

92+
void
93+
PrintStringMapping(const std::unordered_map<std::string, std::string>& u) {
94+
for (const auto& n : u) {
95+
std::cout << n.first << " : " << n.second << "\n";
96+
}
97+
}
98+
9299
void
93100
InsertPropertyTypeMemoryData(
94101
const std::unique_ptr<katana::PropertyGraph>& g,
@@ -112,15 +119,19 @@ InsertPropertyTypeMemoryData(
112119
void
113120
doNonGroupingAnalysis(const std::unique_ptr<katana::PropertyGraph> graph) {
114121
using map_element = std::unordered_map<std::string, int64_t>;
115-
using memory_map = std::unordered_map<std::string, map_element>;
122+
using map_string_element = std::unordered_map<std::string, std::string>;
123+
using memory_map = std::unordered_map<
124+
std::string, std::variant<map_element, map_string_element>>;
116125
memory_map mem_map = {};
117126
map_element basic_raw_stats = {};
118127
auto node_schema = graph->full_node_schema();
119128
auto edge_schema = graph->full_edge_schema();
120129
int64_t total_num_node_props = node_schema->num_fields();
121130
int64_t total_num_edge_props = edge_schema->num_fields();
122131

123-
std::cout << "\n";
132+
// arrow::DataType TypeMap;
133+
134+
// std::cout << static_cast<TypeMap>(0) << "\n";
124135

125136
basic_raw_stats.insert(std::pair("Node-Schema-Size", total_num_node_props));
126137
basic_raw_stats.insert(std::pair("Edge-Schema-Size", total_num_edge_props));
@@ -136,65 +147,81 @@ doNonGroupingAnalysis(const std::unique_ptr<katana::PropertyGraph> graph) {
136147
basic_raw_stats.insert(std::pair("Number-Edges", graph->num_edges()));
137148

138149
PrintMapping(basic_raw_stats);
150+
mem_map.insert(std::pair("General-Stats", basic_raw_stats));
139151

140152
auto atomic_node_types = graph->ListAtomicNodeTypes();
141153

142154
auto atomic_edge_types = graph->ListAtomicEdgeTypes();
143155

144-
// std::cout << "Node Types<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
145-
// PrintAtomicTypes(atomic_node_types);
146-
// std::cout << "Edge Types<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n";
147-
// PrintAtomicTypes(atomic_edge_types);
148-
149156
const katana::GraphTopology& g_topo = graph->topology();
150157

151-
auto node_iterator = g_topo.all_nodes();
152-
auto edge_iterator = g_topo.all_edges();
153-
map_element all_node_prop_stats;
154-
map_element all_edge_prop_stats;
158+
map_string_element all_node_prop_stats;
159+
map_string_element all_edge_prop_stats;
160+
map_element all_node_width_stats;
161+
map_element all_edge_width_stats;
162+
163+
all_node_prop_stats.insert(std::pair("kUnknownName", "uint8"));
164+
all_edge_prop_stats.insert(std::pair("kUnknownName", "uint8"));
165+
166+
all_node_width_stats.insert(std::pair("kUnknownName", sizeof(uint8_t) * 8));
167+
all_edge_width_stats.insert(std::pair("kUnknownName", sizeof(uint8_t) * 8));
155168

156169
std::cout << "\n";
157170
std::cout << "Node Schema\n";
158171
std::cout << "---------------------------------------\n";
159172

160173
for (int32_t i = 0; i < node_schema->num_fields(); ++i) {
161174
std::string prop_name = node_schema->field(i)->name();
162-
auto dtype = node_schema->field(i)->type()->name();
163-
int64_t prop_size = sizeof(node_schema->field(i)->type()->name());
164-
std::cout << prop_name << " : " << dtype << "\n";
165-
all_node_prop_stats.insert(std::pair(prop_name, prop_size));
175+
auto dtype = node_schema->field(i)->type();
176+
177+
all_node_width_stats.insert(
178+
std::pair(prop_name, arrow::bit_width(dtype->id())));
179+
all_node_prop_stats.insert(std::pair(prop_name, dtype->name()));
166180
}
167181

168-
// PrintMapping(all_node_prop_stats);
182+
PrintStringMapping(all_node_prop_stats);
183+
PrintMapping(all_node_width_stats);
184+
mem_map.insert(std::pair("Node-Types", all_node_prop_stats));
169185

170186
std::cout << "\n";
171187
std::cout << "Edge Schema\n";
172-
std::cout << static_cast<arrow::Type::type>(0) << "\n";
173188
std::cout << "----------------------------------------\n";
174189

175190
for (int32_t i = 0; i < edge_schema->num_fields(); ++i) {
176191
std::string prop_name = edge_schema->field(i)->name();
177-
auto dtype = edge_schema->field(i)->type()->name();
178-
int64_t prop_size = sizeof(edge_schema->field(i)->type()->name());
179-
std::cout << prop_name << " : " << dtype << "\n";
180-
all_node_prop_stats.insert(std::pair(prop_name, prop_size));
192+
auto dtype = edge_schema->field(i)->type();
193+
194+
all_edge_width_stats.insert(
195+
std::pair(prop_name, arrow::bit_width(dtype->id())));
196+
all_edge_prop_stats.insert(std::pair(prop_name, dtype->name()));
181197
}
182-
PrintMapping(all_edge_prop_stats);
183198

199+
PrintStringMapping(all_edge_prop_stats);
200+
PrintMapping(all_edge_width_stats);
201+
mem_map.insert(std::pair("Edge-Types", all_edge_prop_stats));
202+
203+
auto node_iterator = g_topo.all_nodes();
204+
// auto edge_iterator = g_topo.all_edges();
205+
206+
int64_t width;
184207
std::cout << "\n";
185208
int64_t node_size = 0;
186209
for (auto node : node_iterator) {
187-
auto node_type = graph->GetTypeOfNode(node);
188-
node_size += sizeof(node_type);
210+
std::string node_type = *graph->GetNodeAtomicTypeName(node);
211+
width = all_node_width_stats.find(node_type)->second;
212+
// std::cout << node_type << " : " << width << " ";
213+
node_size += width;
189214
}
190215
std::cout << "Total Number of bytes taken up by Nodes: " << node_size << "\n";
191216

192-
int64_t edge_size = 0;
193-
for (auto edge : edge_iterator) {
194-
auto edge_type = graph->GetTypeOfEdge(edge);
195-
edge_size += sizeof(edge_type);
196-
}
197-
std::cout << "Total Number of bytes taken up by Edges: " << edge_size << "\n";
217+
// int64_t edge_size = 0;
218+
// for (auto edge : edge_iterator) {
219+
// auto edge_type = graph->GetTypeOfEdge(edge);
220+
// width = all_edge_width_stats.find(edge_type)->second;
221+
// edge_size += width;
222+
// std::cout << width << " ";
223+
// }
224+
// std::cout << "Total Number of bytes taken up by Edges: " << edge_size << "\n";
198225
}
199226

200227
int

0 commit comments

Comments
 (0)