22from typing import List , Generator , Any
33import queue
44import time
5+ import structlog
56from enum import Enum
67from cloudquery .sdk .schema import Table , Resource
78from cloudquery .sdk .message import SyncMessage , SyncInsertMessage , SyncMigrateTableMessage
1213
1314QUEUE_PER_WORKER = 100
1415
16+
1517class ThreadPoolExecutorWithQueueSizeLimit (futures .ThreadPoolExecutor ):
1618 def __init__ (self , maxsize , * args , ** kwargs ):
1719 super (ThreadPoolExecutorWithQueueSizeLimit , self ).__init__ (* args , ** kwargs )
1820 self ._work_queue = queue .Queue (maxsize = maxsize )
1921
2022
21- class WorkerStatus :
22- def __init__ (self , total_table_resolvers ) -> None :
23- self ._total_table_resolvers = total_table_resolvers
23+ class TableResolverStarted :
24+ def __init__ (self , count = 1 ) -> None :
25+ self ._count = count
2426
2527 @property
26- def total_table_resolvers (self ):
27- return self ._total_table_resolvers
28+ def count (self ):
29+ return self ._count
2830
2931
30- class TableResolverStatus :
32+ class TableResolverFinished :
3133 def __init__ (self ) -> None :
3234 pass
3335
3436
3537class Scheduler :
36- def __init__ (self , concurrency : int , queue_size : int = 0 , max_depth : int = 3 ):
38+ def __init__ (self , concurrency : int , queue_size : int = 0 , max_depth : int = 3 , logger = None ):
3739 self ._queue = queue .Queue ()
3840 self ._max_depth = max_depth
41+ if logger is None :
42+ self ._logger = structlog .get_logger ()
3943 if concurrency <= 0 :
4044 raise ValueError ("concurrency must be greater than 0" )
4145 if max_depth <= 0 :
@@ -49,34 +53,53 @@ def __init__(self, concurrency: int, queue_size: int = 0, max_depth : int = 3):
4953 current_depth_concurrency = current_depth_concurrency // 2 if current_depth_concurrency > 1 else 1
5054 current_depth_queue_size = current_depth_queue_size // 2 if current_depth_queue_size > 1 else 1
5155
56+ def shutdown (self ):
57+ for pool in self ._pools :
58+ pool .shutdown ()
59+
5260 def resolve_resource (self , resolver : TableResolver , client , parent : Resource , item : Any ) -> Resource :
53- resource = Resource (resolver .table , None , item )
61+ resource = Resource (resolver .table , parent , item )
5462 resolver .pre_resource_resolve (client , resource )
5563 for column in resolver .table .columns :
5664 resolver .resolve_column (client , resource , column .name )
5765 resolver .post_resource_resolve (client , resource )
5866 return resource
5967
60- def resolve_table (self , resolver : TableResolver , client , parent_item : Any , res : queue .Queue ):
68+ def resolve_table (self , resolver : TableResolver , depth : int , client , parent_item : Resource , res : queue .Queue ):
69+ table_resolvers_started = 0
6170 try :
71+ if depth == 0 :
72+ self ._logger .info ("table resolver started" , table = resolver .table .name , depth = depth )
73+ else :
74+ self ._logger .debug ("table resolver started" , table = resolver .table .name , depth = depth )
75+ total_resources = 0
6276 for item in resolver .resolve (client , parent_item ):
6377 resource = self .resolve_resource (resolver , client , parent_item , item )
6478 res .put (SyncInsertMessage (resource .to_arrow_record ()))
79+ for child_resolvers in resolver .child_resolvers :
80+ self ._pools [depth + 1 ].submit (self .resolve_table , child_resolvers , depth + 1 , client , resource , res )
81+ table_resolvers_started += 1
82+ total_resources += 1
83+ if depth == 0 :
84+ self ._logger .info ("table resolver finished successfully" , table = resolver .table .name , depth = depth )
85+ else :
86+ self ._logger .debug ("table resolver finished successfully" , table = resolver .table .name , depth = depth )
6587 except Exception as e :
66- traceback .print_exc ()
67- print ("exception" )
68- print (e )
88+ self ._logger .error ("table resolver finished with error" , table = resolver .table .name , depth = depth , exception = e )
6989 finally :
70- res .put (TableResolverStatus ())
90+ res .put (TableResolverStarted (count = table_resolvers_started ))
91+ res .put (TableResolverFinished ())
7192
7293 def _sync (self , client , resolvers : List [TableResolver ], res : queue .Queue , deterministic_cq_id = False ):
7394 total_table_resolvers = 0
74- for resolver in resolvers :
75- clients = resolver .multiplex (client )
76- for client in clients :
77- self ._pools [0 ].submit (self .resolve_table , resolver , client , None , res )
78- total_table_resolvers += 1
79- res .put (WorkerStatus (total_table_resolvers ))
95+ try :
96+ for resolver in resolvers :
97+ clients = resolver .multiplex (client )
98+ for client in clients :
99+ self ._pools [0 ].submit (self .resolve_table , resolver , 0 , client , None , res )
100+ total_table_resolvers += 1
101+ finally :
102+ res .put (TableResolverStarted (total_table_resolvers ))
80103
81104 def sync (self , client , resolvers : List [TableResolver ], deterministic_cq_id = False ) -> Generator [SyncMessage , None , None ]:
82105 res = queue .Queue ()
@@ -88,12 +111,12 @@ def sync(self, client, resolvers: List[TableResolver], deterministic_cq_id=False
88111 finished_table_resovlers = 0
89112 while True :
90113 message = res .get ()
91- if type (message ) == WorkerStatus :
92- total_table_resolvers += message .total_table_resolvers
114+ if type (message ) == TableResolverStarted :
115+ total_table_resolvers += message .count
93116 if total_table_resolvers == finished_table_resovlers :
94117 break
95118 continue
96- elif type (message ) == TableResolverStatus :
119+ elif type (message ) == TableResolverFinished :
97120 finished_table_resovlers += 1
98121 if total_table_resolvers == finished_table_resovlers :
99122 break
0 commit comments