@@ -37,6 +37,8 @@ class ClusterInfo:
3737 cluster_id : str
3838 success : int
3939 failures : str
40+ spark_version : str | None = None
41+ policy_id : str | None = None
4042 cluster_name : str | None = None
4143 creator : str | None = None
4244
@@ -156,6 +158,8 @@ def _assess_clusters(self, all_clusters):
156158 cluster_info = ClusterInfo (
157159 cluster_id = cluster .cluster_id if cluster .cluster_id else "" ,
158160 cluster_name = cluster .cluster_name ,
161+ policy_id = cluster .policy_id ,
162+ spark_version = cluster .spark_version ,
159163 creator = cluster .creator_user_name ,
160164 success = 1 ,
161165 failures = "[]" ,
@@ -172,3 +176,58 @@ def snapshot(self) -> Iterable[ClusterInfo]:
172176 def _try_fetch (self ) -> Iterable [ClusterInfo ]:
173177 for row in self ._fetch (f"SELECT * FROM { self ._schema } .{ self ._table } " ):
174178 yield ClusterInfo (* row )
179+
180+
181+ @dataclass
182+ class PolicyInfo :
183+ policy_id : str
184+ policy_name : str
185+ success : int
186+ failures : str
187+ spark_version : str | None = None
188+ policy_description : str | None = None
189+ creator : str | None = None
190+
191+
192+ class PoliciesCrawler (CrawlerBase [PolicyInfo ], CheckClusterMixin ):
193+ def __init__ (self , ws : WorkspaceClient , sbe : SqlBackend , schema ):
194+ super ().__init__ (sbe , "hive_metastore" , schema , "policies" , PolicyInfo )
195+ self ._ws = ws
196+
197+ def _crawl (self ) -> Iterable [PolicyInfo ]:
198+ all_policices = list (self ._ws .cluster_policies .list ())
199+ return list (self ._assess_policies (all_policices ))
200+
201+ def _assess_policies (self , all_policices ):
202+ for policy in all_policices :
203+ failures : list [str ] = []
204+ if policy .policy_id is None :
205+ continue
206+ failures .extend (self ._check_cluster_policy (policy .policy_id , "policy" ))
207+ try :
208+ spark_version = json .dumps (json .loads (policy .definition )["spark_version" ])
209+ except KeyError :
210+ spark_version = None
211+ policy_name = policy .name
212+ creator_name = policy .creator_user_name
213+
214+ policy_info = PolicyInfo (
215+ policy_id = policy .policy_id ,
216+ policy_description = policy .description ,
217+ policy_name = policy_name ,
218+ spark_version = spark_version ,
219+ success = 1 ,
220+ failures = "[]" ,
221+ creator = creator_name ,
222+ )
223+ if len (failures ) > 0 :
224+ policy_info .success = 0
225+ policy_info .failures = json .dumps (failures )
226+ yield policy_info
227+
228+ def snapshot (self ) -> Iterable [PolicyInfo ]:
229+ return self ._snapshot (self ._try_fetch , self ._crawl )
230+
231+ def _try_fetch (self ) -> Iterable [PolicyInfo ]:
232+ for row in self ._fetch (f"SELECT * FROM { self ._schema } .{ self ._table } " ):
233+ yield PolicyInfo (* row )
0 commit comments