Skip to content

Commit f9a1522

Browse files
committed
add tuanhuo-cro related graph def
Committed-by: xiaolei.zl@alibaba-inc.com from Dev container
1 parent 5a5baeb commit f9a1522

File tree

9 files changed

+269
-0
lines changed

9 files changed

+269
-0
lines changed
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: wiki # then must have a modern dir under ${data} directory
2+
store_type: mutable_csr # v6d, groot, gart
3+
schema:
4+
vertex_types:
5+
- type_name: user
6+
type_id: 0
7+
x_csr_params:
8+
max_vertex_num: 5000000
9+
properties:
10+
- property_id: 0
11+
property_name: id
12+
property_type:
13+
primitive_type: DT_SIGNED_INT64
14+
primary_keys:
15+
- id
16+
edge_types:
17+
- type_name: friend
18+
type_id: 0
19+
vertex_type_pair_relations:
20+
- source_vertex: user
21+
destination_vertex: user
22+
relation: MANY_TO_MANY
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
graph: modern_graph
2+
loading_config:
3+
data_source:
4+
scheme: file # file, oss, s3, hdfs; only file is supported now
5+
import_option: init # append, overwrite, only init is supported now
6+
format:
7+
type: csv
8+
metadata:
9+
delimiter: "," # other loading configuration places here
10+
header_row: false # whether to use the first row as the header
11+
quoting: false
12+
quote_char: '"'
13+
double_quote: true
14+
escape_char: '\'
15+
escaping: false
16+
block_size: 4MB
17+
batch_reader: true
18+
null_values: [""]
19+
20+
vertex_mappings:
21+
- type_name: user # must align with the schema
22+
inputs:
23+
- vertices.csv
24+
column_mappings:
25+
- column:
26+
index: 0 # can be omitted if the index is the same as the property index
27+
property: id
28+
edge_mappings:
29+
- type_triplet:
30+
edge: friend
31+
source_vertex: user
32+
destination_vertex: user
33+
inputs:
34+
- edges.csv
35+
source_vertex_mappings:
36+
- column:
37+
index: 0
38+
destination_vertex_mappings:
39+
- column:
40+
index: 1
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/bin/python3
2+
3+
import os
4+
import sys
5+
6+
if __name__ == "__main__":
7+
# Expect a arg of file path
8+
if len(sys.argv) != 4:
9+
print("Usage: python3 preprocess.py <file> <vertex_file> <edge_file>")
10+
sys.exit(1)
11+
# Get the file path
12+
file_path = sys.argv[1]
13+
vertex_file_path = sys.argv[2]
14+
edge_file_path = sys.argv[3]
15+
vertices = set()
16+
edges = []
17+
# open the file and iterate over the lines
18+
with open(file_path, "r") as file:
19+
for line in file:
20+
# if line starts with #, skip it
21+
if line.startswith("#"):
22+
continue
23+
# split the line by space
24+
parts = line.split()
25+
# if contains two parts, it is a edge
26+
if len(parts) == 2:
27+
vertices.add(parts[0])
28+
vertices.add(parts[1])
29+
edges.append(parts)
30+
# write vertices to vertices.csv, and edges to edges.csv
31+
with open(vertex_file_path, "w") as file:
32+
for vertex in vertices:
33+
file.write(vertex + "\n")
34+
with open(edge_file_path, "w") as file:
35+
for edge in edges:
36+
file.write(edge[0] + "," + edge[1] + "\n")
37+
38+
39+
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: wiki # then must have a modern dir under ${data} directory
2+
store_type: mutable_csr # v6d, groot, gart
3+
schema:
4+
vertex_types:
5+
- type_name: user
6+
type_id: 0
7+
x_csr_params:
8+
max_vertex_num: 5000000
9+
properties:
10+
- property_id: 0
11+
property_name: id
12+
property_type:
13+
primitive_type: DT_SIGNED_INT64
14+
primary_keys:
15+
- id
16+
edge_types:
17+
- type_name: friend
18+
type_id: 0
19+
vertex_type_pair_relations:
20+
- source_vertex: user
21+
destination_vertex: user
22+
relation: MANY_TO_MANY
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
graph: modern_graph
2+
loading_config:
3+
data_source:
4+
scheme: file # file, oss, s3, hdfs; only file is supported now
5+
import_option: init # append, overwrite, only init is supported now
6+
format:
7+
type: csv
8+
metadata:
9+
delimiter: "," # other loading configuration places here
10+
header_row: false # whether to use the first row as the header
11+
quoting: false
12+
quote_char: '"'
13+
double_quote: true
14+
escape_char: '\'
15+
escaping: false
16+
block_size: 4MB
17+
batch_reader: true
18+
null_values: [""]
19+
20+
vertex_mappings:
21+
- type_name: user # must align with the schema
22+
inputs:
23+
- vertices.csv
24+
column_mappings:
25+
- column:
26+
index: 0 # can be omitted if the index is the same as the property index
27+
property: id
28+
edge_mappings:
29+
- type_triplet:
30+
edge: friend
31+
source_vertex: user
32+
destination_vertex: user
33+
inputs:
34+
- edges.csv
35+
source_vertex_mappings:
36+
- column:
37+
index: 0
38+
destination_vertex_mappings:
39+
- column:
40+
index: 1
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/bin/python3
2+
3+
import os
4+
import sys
5+
6+
if __name__ == "__main__":
7+
# Expect a arg of file path
8+
if len(sys.argv) != 4:
9+
print("Usage: python3 preprocess.py <file> <vertex_file> <edge_file>")
10+
sys.exit(1)
11+
# Get the file path
12+
file_path = sys.argv[1]
13+
vertex_file_path = sys.argv[2]
14+
edge_file_path = sys.argv[3]
15+
vertices = set()
16+
edges = []
17+
# open the file and iterate over the lines
18+
with open(file_path, "r") as file:
19+
for line in file:
20+
# if line starts with #, skip it
21+
if line.startswith("#"):
22+
continue
23+
# split the line by space
24+
parts = line.split()
25+
# if contains two parts, it is a edge
26+
if len(parts) == 2:
27+
vertices.add(parts[0])
28+
vertices.add(parts[1])
29+
edges.append(parts)
30+
# write vertices to vertices.csv, and edges to edges.csv
31+
with open(vertex_file_path, "w") as file:
32+
for vertex in vertices:
33+
file.write(vertex + "\n")
34+
with open(edge_file_path, "w") as file:
35+
for edge in edges:
36+
file.write(edge[0] + "," + edge[1] + "\n")
37+
38+
39+
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
name: wiki # then must have a modern dir under ${data} directory
2+
store_type: mutable_csr # v6d, groot, gart
3+
schema:
4+
vertex_types:
5+
- type_name: article
6+
type_id: 0
7+
x_csr_params:
8+
max_vertex_num: 5000000
9+
properties:
10+
- property_id: 0
11+
property_name: id
12+
property_type:
13+
primitive_type: DT_SIGNED_INT64
14+
primary_keys:
15+
- id
16+
edge_types:
17+
- type_name: link
18+
type_id: 0
19+
vertex_type_pair_relations:
20+
- source_vertex: article
21+
destination_vertex: article
22+
relation: MANY_TO_MANY
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
graph: modern_graph
2+
loading_config:
3+
data_source:
4+
scheme: file # file, oss, s3, hdfs; only file is supported now
5+
import_option: init # append, overwrite, only init is supported now
6+
format:
7+
type: csv
8+
metadata:
9+
delimiter: " " # other loading configuration places here
10+
header_row: false # whether to use the first row as the header
11+
quoting: false
12+
quote_char: '"'
13+
double_quote: true
14+
escape_char: '\'
15+
escaping: false
16+
block_size: 4MB
17+
batch_reader: true
18+
null_values: [""]
19+
20+
vertex_mappings:
21+
- type_name: article # must align with the schema
22+
inputs:
23+
- article.csv
24+
column_mappings:
25+
- column:
26+
index: 0 # can be omitted if the index is the same as the property index
27+
property: id
28+
edge_mappings:
29+
- type_triplet:
30+
edge: link
31+
source_vertex: article
32+
destination_vertex: article
33+
inputs:
34+
- link.csv
35+
source_vertex_mappings:
36+
- column:
37+
index: 0
38+
destination_vertex_mappings:
39+
- column:
40+
index: 1

flex/tests/leiden/test.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ int main(int argc, char** argv) {
7575
LOG(INFO) << "Graph created, vcount: " << igraph_vcount(&g)
7676
<< ", ecount: " << igraph_ecount(&g);
7777

78+
double t = -grape::GetCurrentTime();
79+
7880
Graph graph(&g);
7981

8082
CPMVertexPartition part(&graph, 0.5 /* resolution */);
@@ -91,4 +93,7 @@ int main(int argc, char** argv) {
9193
}
9294

9395
igraph_destroy(&g);
96+
97+
t += grape::GetCurrentTime();
98+
LOG(INFO) << "Time: " << t << "s";
9499
}

0 commit comments

Comments
 (0)