Skip to content

Commit d9303ef

Browse files
committed
stash changes
Committed-by: xiaolei.zl@alibaba-inc.com from Dev container
1 parent f9a1522 commit d9303ef

File tree

5 files changed

+198
-6
lines changed

5 files changed

+198
-6
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: modern_graph # then must have a modern dir under ${data} directory
2+
version: v0.1
3+
store_type: mutable_csr # v6d, groot, gart
4+
description: A graph with 2 vertex types and 2 edge types
5+
schema:
6+
vertex_types:
7+
- type_id: 0
8+
type_name: vertex
9+
properties:
10+
- property_id: 0
11+
property_name: id
12+
property_type:
13+
primitive_type: DT_SIGNED_INT64
14+
primary_keys:
15+
- id
16+
edge_types:
17+
- type_id: 0
18+
type_name: edge
19+
vertex_type_pair_relations:
20+
- source_vertex: vertex
21+
destination_vertex: vertex
22+
relation: MANY_TO_MANY
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
graph: modern_graph
2+
loading_config:
3+
data_source:
4+
scheme: file # file, oss, s3, hdfs; only file is supported now
5+
#location: /home/graphscope/gs_interactive_default_graph/
6+
import_option: init # append, overwrite, only init is supported now
7+
format:
8+
type: csv
9+
metadata:
10+
delimiter: "," # other loading configuration places here
11+
header_row: false # whether to use the first row as the header
12+
quoting: false
13+
quote_char: '"'
14+
double_quote: true
15+
escape_char: '\'
16+
escaping: false
17+
block_size: 4MB
18+
batch_reader: true
19+
null_values: [""]
20+
21+
vertex_mappings:
22+
- type_name: vertex # must align with the schema
23+
inputs:
24+
- vertices.csv
25+
edge_mappings:
26+
- type_triplet:
27+
edge: edge
28+
source_vertex: vertex
29+
destination_vertex: vertex
30+
inputs:
31+
- edges.csv
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/bin/python3
2+
3+
import os
4+
import sys
5+
6+
if __name__ == "__main__":
7+
# Expect a arg of file path
8+
if len(sys.argv) != 4:
9+
print("Usage: python3 preprocess.py <file> <vertex_file> <edge_file>")
10+
sys.exit(1)
11+
# Get the file path
12+
file_path = sys.argv[1]
13+
vertex_file_path = sys.argv[2]
14+
edge_file_path = sys.argv[3]
15+
vertices = set()
16+
edges = []
17+
# open the file and iterate over the lines
18+
with open(file_path, "r") as file:
19+
for line in file:
20+
# if line starts with #, skip it
21+
if line.startswith("#"):
22+
continue
23+
# split the line by space
24+
parts = line.split()
25+
# if contains two parts, it is a edge
26+
if len(parts) == 2:
27+
int_parts = [int(part) for part in parts]
28+
# add the vertices to the set
29+
vertices.add(int_parts[0])
30+
vertices.add(int_parts[1])
31+
edges.append(parts)
32+
# write vertices to vertices.csv, and edges to edges.csv
33+
# sort vertices
34+
vertices = sorted(vertices)
35+
with open(vertex_file_path, "w") as file:
36+
for vertex in vertices:
37+
file.write(str(vertex) + "\n")
38+
with open(edge_file_path, "w") as file:
39+
for edge in edges:
40+
file.write(edge[0] + "," + edge[1] + "\n")
41+
42+
43+
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#!/bin/python3
2+
import sys
3+
import csv
4+
5+
6+
if __name__ == "__main__":
7+
# expect two args, the actual file and expected file
8+
9+
if len(sys.argv) != 3:
10+
print("Usage: sort_and_compare.py <actual> <expected>")
11+
sys.exit(1)
12+
13+
actual_file = sys.argv[1]
14+
expected_file = sys.argv[2]
15+
# Each csv files contains two columns, the first column is the cluster id, and second column is the vertex id.
16+
# Read the actual file and expected file into two lists
17+
actual = {}
18+
actual_cluster_map = {}
19+
expected = {}
20+
expected_cluster_map = {}
21+
with open(actual_file, 'r') as f:
22+
reader = csv.reader(f,delimiter=' ')
23+
for row in reader:
24+
vertex_id = int(row[0])
25+
cluster_id = int(row[1])
26+
actual[vertex_id] = cluster_id
27+
if actual_cluster_map.get(cluster_id) is None:
28+
actual_cluster_map[cluster_id] = 0
29+
actual_cluster_map[cluster_id] += 1
30+
print("Finished reading actual file, vertices count: ", len(actual), " num clusters ", len(actual_cluster_map), " min cluster size, max cluster size: ", min(actual_cluster_map.values()), max(actual_cluster_map.values()))
31+
32+
with open(expected_file, 'r') as f:
33+
reader = csv.reader(f, delimiter='\t')
34+
for row in reader:
35+
vertex_id = int(row[0])
36+
cluster_id = int(row[1])
37+
expected[vertex_id] = cluster_id
38+
if expected_cluster_map.get(cluster_id) is None:
39+
expected_cluster_map[cluster_id] = 0
40+
expected_cluster_map[cluster_id] += 1
41+
42+
# cur_cluster = 0
43+
# for line in f:
44+
# # each line is like Category:Buprestoidea; 301 302 303 304 305 306 30
45+
# # got all numbers after; and put them into the same cluster
46+
# vertex_ids = line.split(";")[1].split()
47+
# for vertex_id in vertex_ids:
48+
# expected[int(vertex_id)] = cur_cluster
49+
# expected_cluster_map[cur_cluster] = len(vertex_ids)
50+
# cur_cluster += 1
51+
52+
print("Finished reading expected file, vertices count: ", len(expected), " num clusters", len(expected_cluster_map), " min cluster size, max cluster size: ", min(expected_cluster_map.values()), max(expected_cluster_map.values()))
53+
actual_cluster_id_to_expected_cluster_id = {}
54+
cnt = 0
55+
for vertex_id, cluster_id in actual.items():
56+
if vertex_id in expected:
57+
if actual_cluster_id_to_expected_cluster_id.get(cluster_id) is None:
58+
actual_cluster_id_to_expected_cluster_id[cluster_id] = expected[vertex_id]
59+
if actual_cluster_id_to_expected_cluster_id[cluster_id] != expected[vertex_id]:
60+
cnt += 1
61+
# print("Cluster id mismatch for vertex id: ", vertex_id, " actual cluster id: ", cluster_id, " expected cluster id: ", expected[vertex_id])
62+
else:
63+
print("Vertex id not found in expected file: ", vertex_id)
64+
65+
# count the number of cluster with size >2
66+
expected_cluster_ids_appeared_in_actual = set()
67+
cnt_2 = 0
68+
for cluster_id, size in actual_cluster_map.items():
69+
if size >= 2:
70+
cnt_2 += 1
71+
expected_cluster_id = actual_cluster_id_to_expected_cluster_id.get(cluster_id)
72+
expected_cluster_ids_appeared_in_actual.add(expected_cluster_id)
73+
print("Number expect cluster with size >= 2 appeared in actual: ", cnt_2, " out of ", len(expected_cluster_map))
74+
75+
print("Total cluster id mismatch: ", cnt, " out of ", len(expected), "expected cluster number: ", len(actual_cluster_id_to_expected_cluster_id), " actual cluster number: ", len(expected_cluster_map))
76+
77+

flex/tests/leiden/test.cc

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@
1717
#include <stdio.h>
1818
#include "include/CPMVertexPartition.h"
1919
#include "include/GraphHelper.h"
20+
#include "include/ModularityVertexPartition.h"
2021
#include "include/Optimiser.h"
22+
#include "include/RBConfigurationVertexPartition.h"
23+
#include "include/RBERVertexPartition.h"
24+
#include "include/SignificanceVertexPartition.h"
2125

2226
#include <glog/logging.h>
2327
#include "flex/engines/graph_db/database/graph_db.h"
@@ -33,28 +37,33 @@ void graph_db_to_igraph(igraph_t* g, gs::GraphDBSession& sess) {
3337
igraph_vector_int_t edges;
3438
igraph_vector_int_init(&edges, 0);
3539
auto& frag = sess.graph();
40+
size_t edges_cnt = 0;
3641
for (gs::vid_t v = 0; v < frag.vertex_num(0); ++v) {
3742
auto oe = frag.get_outgoing_edges_raw(0, v, 0, 0);
3843
while (oe->is_valid()) {
3944
igraph_vector_int_push_back(&edges, v);
4045
igraph_vector_int_push_back(&edges, oe->get_neighbor());
4146
oe->next();
47+
edges_cnt++;
4248
}
4349
}
50+
LOG(INFO) << "Edges count: " << edges_cnt;
4451

45-
igraph_create(g, &edges, 0, true);
52+
igraph_create(g, &edges, frag.vertex_num(0), true);
4653
igraph_vector_int_destroy(&edges);
4754
}
4855

4956
int main(int argc, char** argv) {
5057
// igraph_t g;
5158
// igraph_famous(&g, "Zachary");
52-
if (argc != 3) {
53-
LOG(ERROR) << "Usage: " << argv[0] << "<schema_path> <db_path>";
59+
if (argc != 4) {
60+
LOG(ERROR) << "Usage: " << argv[0]
61+
<< "<schema_path> <db_path> <output_path>";
5462
return 1;
5563
}
5664
std::string schema_path = argv[1];
5765
std::string db_path = argv[2];
66+
std::string output_path = argv[3];
5867

5968
igraph_t g;
6069

@@ -85,12 +94,22 @@ int main(int argc, char** argv) {
8594

8695
o.optimise_partition(&part);
8796

88-
cout << "Node\tCommunity" << endl;
97+
// write to file
98+
// /workspaces/GraphScope/flex/interactive/examples/wiki/leiden_out open file
99+
FILE* f = fopen(output_path.c_str(), "w");
100+
if (f == NULL) {
101+
printf("Error opening file!\n");
102+
exit(1);
103+
}
104+
// cout << "Node\tCommunity" << endl;
105+
cout << "Number of communities: " << part.n_communities() << endl;
89106
auto txn = sess.GetReadTransaction();
90107
for (size_t i = 0; i < graph.vcount(); i++) {
91-
cout << part.membership(i) << "," << txn.GetVertexId(0, i).to_string()
92-
<< endl;
108+
// cout << i << "\t" << part.membership[i] << endl;
109+
fprintf(f, "%zu\t%zu\n", txn.GetVertexId(0, i).AsInt64(),
110+
part.membership()[i]);
93111
}
112+
fclose(f);
94113

95114
igraph_destroy(&g);
96115

0 commit comments

Comments
 (0)