File tree Expand file tree Collapse file tree 9 files changed +269
-0
lines changed
Expand file tree Collapse file tree 9 files changed +269
-0
lines changed Original file line number Diff line number Diff line change 1+ name : wiki # then must have a modern dir under ${data} directory
2+ store_type : mutable_csr # v6d, groot, gart
3+ schema :
4+ vertex_types :
5+ - type_name : user
6+ type_id : 0
7+ x_csr_params :
8+ max_vertex_num : 5000000
9+ properties :
10+ - property_id : 0
11+ property_name : id
12+ property_type :
13+ primitive_type : DT_SIGNED_INT64
14+ primary_keys :
15+ - id
16+ edge_types :
17+ - type_name : friend
18+ type_id : 0
19+ vertex_type_pair_relations :
20+ - source_vertex : user
21+ destination_vertex : user
22+ relation : MANY_TO_MANY
Original file line number Diff line number Diff line change 1+ graph : modern_graph
2+ loading_config :
3+ data_source :
4+ scheme : file # file, oss, s3, hdfs; only file is supported now
5+ import_option : init # append, overwrite, only init is supported now
6+ format :
7+ type : csv
8+ metadata :
9+ delimiter : " ," # other loading configuration places here
10+ header_row : false # whether to use the first row as the header
11+ quoting : false
12+ quote_char : ' "'
13+ double_quote : true
14+ escape_char : ' \'
15+ escaping : false
16+ block_size : 4MB
17+ batch_reader : true
18+ null_values : [""]
19+
20+ vertex_mappings :
21+ - type_name : user # must align with the schema
22+ inputs :
23+ - vertices.csv
24+ column_mappings :
25+ - column :
26+ index : 0 # can be omitted if the index is the same as the property index
27+ property : id
28+ edge_mappings :
29+ - type_triplet :
30+ edge : friend
31+ source_vertex : user
32+ destination_vertex : user
33+ inputs :
34+ - edges.csv
35+ source_vertex_mappings :
36+ - column :
37+ index : 0
38+ destination_vertex_mappings :
39+ - column :
40+ index : 1
Original file line number Diff line number Diff line change 1+ #!/bin/python3
2+
3+ import os
4+ import sys
5+
6+ if __name__ == "__main__" :
7+ # Expect a arg of file path
8+ if len (sys .argv ) != 4 :
9+ print ("Usage: python3 preprocess.py <file> <vertex_file> <edge_file>" )
10+ sys .exit (1 )
11+ # Get the file path
12+ file_path = sys .argv [1 ]
13+ vertex_file_path = sys .argv [2 ]
14+ edge_file_path = sys .argv [3 ]
15+ vertices = set ()
16+ edges = []
17+ # open the file and iterate over the lines
18+ with open (file_path , "r" ) as file :
19+ for line in file :
20+ # if line starts with #, skip it
21+ if line .startswith ("#" ):
22+ continue
23+ # split the line by space
24+ parts = line .split ()
25+ # if contains two parts, it is a edge
26+ if len (parts ) == 2 :
27+ vertices .add (parts [0 ])
28+ vertices .add (parts [1 ])
29+ edges .append (parts )
30+ # write vertices to vertices.csv, and edges to edges.csv
31+ with open (vertex_file_path , "w" ) as file :
32+ for vertex in vertices :
33+ file .write (vertex + "\n " )
34+ with open (edge_file_path , "w" ) as file :
35+ for edge in edges :
36+ file .write (edge [0 ] + "," + edge [1 ] + "\n " )
37+
38+
39+
Original file line number Diff line number Diff line change 1+ name : wiki # then must have a modern dir under ${data} directory
2+ store_type : mutable_csr # v6d, groot, gart
3+ schema :
4+ vertex_types :
5+ - type_name : user
6+ type_id : 0
7+ x_csr_params :
8+ max_vertex_num : 5000000
9+ properties :
10+ - property_id : 0
11+ property_name : id
12+ property_type :
13+ primitive_type : DT_SIGNED_INT64
14+ primary_keys :
15+ - id
16+ edge_types :
17+ - type_name : friend
18+ type_id : 0
19+ vertex_type_pair_relations :
20+ - source_vertex : user
21+ destination_vertex : user
22+ relation : MANY_TO_MANY
Original file line number Diff line number Diff line change 1+ graph : modern_graph
2+ loading_config :
3+ data_source :
4+ scheme : file # file, oss, s3, hdfs; only file is supported now
5+ import_option : init # append, overwrite, only init is supported now
6+ format :
7+ type : csv
8+ metadata :
9+ delimiter : " ," # other loading configuration places here
10+ header_row : false # whether to use the first row as the header
11+ quoting : false
12+ quote_char : ' "'
13+ double_quote : true
14+ escape_char : ' \'
15+ escaping : false
16+ block_size : 4MB
17+ batch_reader : true
18+ null_values : [""]
19+
20+ vertex_mappings :
21+ - type_name : user # must align with the schema
22+ inputs :
23+ - vertices.csv
24+ column_mappings :
25+ - column :
26+ index : 0 # can be omitted if the index is the same as the property index
27+ property : id
28+ edge_mappings :
29+ - type_triplet :
30+ edge : friend
31+ source_vertex : user
32+ destination_vertex : user
33+ inputs :
34+ - edges.csv
35+ source_vertex_mappings :
36+ - column :
37+ index : 0
38+ destination_vertex_mappings :
39+ - column :
40+ index : 1
Original file line number Diff line number Diff line change 1+ #!/bin/python3
2+
3+ import os
4+ import sys
5+
6+ if __name__ == "__main__" :
7+ # Expect a arg of file path
8+ if len (sys .argv ) != 4 :
9+ print ("Usage: python3 preprocess.py <file> <vertex_file> <edge_file>" )
10+ sys .exit (1 )
11+ # Get the file path
12+ file_path = sys .argv [1 ]
13+ vertex_file_path = sys .argv [2 ]
14+ edge_file_path = sys .argv [3 ]
15+ vertices = set ()
16+ edges = []
17+ # open the file and iterate over the lines
18+ with open (file_path , "r" ) as file :
19+ for line in file :
20+ # if line starts with #, skip it
21+ if line .startswith ("#" ):
22+ continue
23+ # split the line by space
24+ parts = line .split ()
25+ # if contains two parts, it is a edge
26+ if len (parts ) == 2 :
27+ vertices .add (parts [0 ])
28+ vertices .add (parts [1 ])
29+ edges .append (parts )
30+ # write vertices to vertices.csv, and edges to edges.csv
31+ with open (vertex_file_path , "w" ) as file :
32+ for vertex in vertices :
33+ file .write (vertex + "\n " )
34+ with open (edge_file_path , "w" ) as file :
35+ for edge in edges :
36+ file .write (edge [0 ] + "," + edge [1 ] + "\n " )
37+
38+
39+
Original file line number Diff line number Diff line change 1+ name : wiki # then must have a modern dir under ${data} directory
2+ store_type : mutable_csr # v6d, groot, gart
3+ schema :
4+ vertex_types :
5+ - type_name : article
6+ type_id : 0
7+ x_csr_params :
8+ max_vertex_num : 5000000
9+ properties :
10+ - property_id : 0
11+ property_name : id
12+ property_type :
13+ primitive_type : DT_SIGNED_INT64
14+ primary_keys :
15+ - id
16+ edge_types :
17+ - type_name : link
18+ type_id : 0
19+ vertex_type_pair_relations :
20+ - source_vertex : article
21+ destination_vertex : article
22+ relation : MANY_TO_MANY
Original file line number Diff line number Diff line change 1+ graph : modern_graph
2+ loading_config :
3+ data_source :
4+ scheme : file # file, oss, s3, hdfs; only file is supported now
5+ import_option : init # append, overwrite, only init is supported now
6+ format :
7+ type : csv
8+ metadata :
9+ delimiter : " " # other loading configuration places here
10+ header_row : false # whether to use the first row as the header
11+ quoting : false
12+ quote_char : ' "'
13+ double_quote : true
14+ escape_char : ' \'
15+ escaping : false
16+ block_size : 4MB
17+ batch_reader : true
18+ null_values : [""]
19+
20+ vertex_mappings :
21+ - type_name : article # must align with the schema
22+ inputs :
23+ - article.csv
24+ column_mappings :
25+ - column :
26+ index : 0 # can be omitted if the index is the same as the property index
27+ property : id
28+ edge_mappings :
29+ - type_triplet :
30+ edge : link
31+ source_vertex : article
32+ destination_vertex : article
33+ inputs :
34+ - link.csv
35+ source_vertex_mappings :
36+ - column :
37+ index : 0
38+ destination_vertex_mappings :
39+ - column :
40+ index : 1
Original file line number Diff line number Diff line change @@ -75,6 +75,8 @@ int main(int argc, char** argv) {
7575 LOG (INFO) << " Graph created, vcount: " << igraph_vcount (&g)
7676 << " , ecount: " << igraph_ecount (&g);
7777
78+ double t = -grape::GetCurrentTime ();
79+
7880 Graph graph (&g);
7981
8082 CPMVertexPartition part (&graph, 0.5 /* resolution */ );
@@ -91,4 +93,7 @@ int main(int argc, char** argv) {
9193 }
9294
9395 igraph_destroy (&g);
96+
97+ t += grape::GetCurrentTime ();
98+ LOG (INFO) << " Time: " << t << " s" ;
9499}
You can’t perform that action at this time.
0 commit comments