11import concurrent
22import multiprocessing
3+ from collections import defaultdict
34from concurrent .futures import ThreadPoolExecutor , Future
45from contextlib import suppress
56from logging import getLogger
2526from sqlalchemy .engine import Engine
2627
2728from cloud2sql .show_progress import CollectInfo
28- from cloud2sql .sql import SqlModel , SqlUpdater
29+ from cloud2sql .sql import SqlUpdater , sql_updater
2930
30- log = getLogger ("cloud2sql" )
31+ log = getLogger ("resoto. cloud2sql" )
3132
3233
3334def collectors (raw_config : Json , feedback : CoreFeedback ) -> Dict [str , BaseCollectorPlugin ]:
@@ -62,24 +63,25 @@ def collect(collector: BaseCollectorPlugin, engine: Engine, feedback: CoreFeedba
6263 collector .collect ()
6364 # read the kinds created from this collector
6465 kinds = [from_json (m , Kind ) for m in collector .graph .export_model (walk_subclasses = False )]
65- model = SqlModel (Model ({k .fqn : k for k in kinds }))
66+ updater = sql_updater (Model ({k .fqn : k for k in kinds }), engine )
6667 node_edge_count = len (collector .graph .nodes ) + len (collector .graph .edges )
67- ne_count = iter (range (0 , node_edge_count ))
68- progress_update = max (node_edge_count // 100 , 50 )
68+ ne_count = 0
6969 schema = f"create temp tables { engine .dialect .name } "
7070 syncdb = f"synchronize { engine .dialect .name } "
7171 feedback .progress_done (schema , 0 , 1 , context = [collector .cloud ])
7272 feedback .progress_done (syncdb , 0 , node_edge_count , context = [collector .cloud ])
7373 with engine .connect () as conn :
7474 with conn .begin ():
7575 # create the ddl metadata from the kinds
76- model .create_schema (conn , args )
76+ updater .create_schema (conn , args )
7777 feedback .progress_done (schema , 1 , 1 , context = [collector .cloud ])
78- # ingest the data
79- updater = SqlUpdater (model )
78+
79+ # group all nodes by kind
80+ nodes_by_kind = defaultdict (list )
8081 node : BaseResource
8182 for node in collector .graph .nodes :
8283 node ._graph = collector .graph
84+ # create an exported node with the same scheme as resotocore
8385 exported = node_to_dict (node )
8486 exported ["type" ] = "node"
8587 exported ["ancestors" ] = {
@@ -88,17 +90,29 @@ def collect(collector: BaseCollectorPlugin, engine: Engine, feedback: CoreFeedba
8890 "region" : {"reported" : {"id" : node .region ().name }},
8991 "zone" : {"reported" : {"id" : node .zone ().name }},
9092 }
91- stmt = updater .insert_node (exported )
92- if stmt is not None :
93- conn .execute (stmt )
94- if (nx := next (ne_count )) % progress_update == 0 :
95- feedback .progress_done (syncdb , nx , node_edge_count , context = [collector .cloud ])
93+ nodes_by_kind [node .kind ].append (exported )
94+
95+ # insert batches of nodes by kind
96+ for kind , nodes in nodes_by_kind .items ():
97+ log .info (f"Inserting { len (nodes )} nodes of kind { kind } " )
98+ for insert in updater .insert_nodes (kind , nodes ):
99+ conn .execute (insert )
100+ ne_count += len (nodes )
101+ feedback .progress_done (syncdb , ne_count , node_edge_count , context = [collector .cloud ])
102+
103+ # group all nodes by kind of from/to
104+ edges_by_kind = defaultdict (list )
96105 for from_node , to_node , _ in collector .graph .edges :
97- stmt = updater .insert_node ({"from" : from_node .chksum , "to" : to_node .chksum , "type" : "edge" })
98- if stmt is not None :
99- conn .execute (stmt )
100- if (nx := next (ne_count )) % progress_update == 0 :
101- feedback .progress_done (syncdb , nx , node_edge_count , context = [collector .cloud ])
106+ edge_node = {"from" : from_node .chksum , "to" : to_node .chksum , "type" : "edge" }
107+ edges_by_kind [(from_node .kind , to_node .kind )].append (edge_node )
108+
109+ # insert batches of edges by from/to kind
110+ for from_to , nodes in edges_by_kind .items ():
111+ log .info (f"Inserting { len (nodes )} edges from { from_to [0 ]} to { from_to [1 ]} " )
112+ for insert in updater .insert_edges (from_to , nodes ):
113+ conn .execute (insert )
114+ ne_count += len (nodes )
115+ feedback .progress_done (syncdb , ne_count , node_edge_count , context = [collector .cloud ])
102116 feedback .progress_done (collector .cloud , 1 , 1 )
103117
104118
@@ -131,7 +145,10 @@ def collect_from_plugins(engine: Engine, args: Namespace) -> None:
131145 for future in concurrent .futures .as_completed (futures ):
132146 future .result ()
133147 # when all collectors are done, we can swap all temp tables
134- SqlModel .swap_temp_tables (engine )
148+ swap_tables = "Make latest snapshot available"
149+ feedback .progress_done (swap_tables , 0 , 1 )
150+ SqlUpdater .swap_temp_tables (engine )
151+ feedback .progress_done (swap_tables , 1 , 1 )
135152 except Exception as e :
136153 # set end and wait for live to finish, otherwise the cursor is not reset
137154 end .set ()
0 commit comments