@@ -31,7 +31,9 @@ def get_clickhouse_client(
3131 host : str , user : str , password : str
3232) -> clickhouse_connect .driver .client .Client :
3333 # for local testing only, disable SSL verification
34- # return clickhouse_connect.get_client(host=host, user=user, password=password, secure=True, verify=False)
34+ return clickhouse_connect .get_client (
35+ host = host , user = user , password = password , secure = True , verify = False
36+ )
3537
3638 return clickhouse_connect .get_client (
3739 host = host , user = user , password = password , secure = True
@@ -98,7 +100,7 @@ def write_to_file(data: Any, filename="", path=""):
98100# --------- Github Config File Methods Start----
99101class LazyFileHistory :
100102 """
101- Reads the content of a file from a GitHub repository on the version that it was on a specific time and date provided. It then caches the commits and file contents avoiding unnecessary requests to the GitHub API.
103+ Reads the content of a file from a GitHub repository on the version that it was closest to a specific time and date provided. It then caches the commits and file contents avoiding unnecessary requests to the GitHub API.
102104 All public methods are thread-safe.
103105 """
104106
@@ -110,7 +112,9 @@ def __init__(self, repo: Any, path: str) -> None:
110112 self ._fetched_all_commits = False
111113 self ._lock = threading .RLock ()
112114
113- def get_version_after_timestamp (self , timestamp : str | datetime ) -> Optional [str ]:
115+ def get_version_close_to_timestamp (
116+ self , timestamp : str | datetime
117+ ) -> Optional [str ]:
114118 try :
115119 with self ._lock :
116120 if not isinstance (timestamp , datetime ):
@@ -120,31 +124,34 @@ def get_version_after_timestamp(self, timestamp: str | datetime) -> Optional[str
120124 )
121125 else :
122126 timestamp = parse (timestamp )
123- commit = self ._find_earliest_after_in_cache (timestamp )
127+
128+ info (
129+ f" [LazyFileHistory] Try fetch cached content for { self .repo } : { self .path } at { timestamp .isoformat ()} "
130+ )
131+ commit = self ._find_closest_before_or_equal_in_cache (timestamp )
124132 if commit :
125133 return self ._fetch_content_for_commit (commit )
126134
127135 if not self ._fetched_all_commits :
136+ info (
137+ f" [LazyFileHistory] Nothing found in cache, fetching all commit includes { self .repo } /{ self .path } close/equal to { timestamp .isoformat ()} "
138+ )
128139 commit = self ._fetch_until_timestamp (timestamp )
129140 if commit :
130141 return self ._fetch_content_for_commit (commit )
142+ info (
143+ f" [LazyFileHistory] No config file found in cache and in commits for { self .repo } /{ self .path } ."
144+ )
145+ return None
131146 except Exception as e :
132147 warning (
133- f"Error fetching content for { self .repo } : { self .path } at { timestamp } : { e } "
148+ f" [LazyFileHistory] Error fetching content for { self .repo } : { self .path } at { timestamp } : { e } "
134149 )
135-
136- return None
137-
138- def _find_earliest_after_in_cache (self , timestamp : datetime ) -> Optional [str ]:
139- commits_after = [
140- c for c in self ._commits_cache if c .commit .author .date > timestamp
141- ]
142-
143- if not commits_after :
144150 return None
145- return commits_after [- 1 ]
146151
147152 def _fetch_until_timestamp (self , timestamp : datetime ) -> Optional [str ]:
153+ # call github api, with path parameter
154+ # Only commits containing this file path will be returned.
148155 all_commits = self .repo .get_commits (path = self .path )
149156 known_shas = {c .sha for c in self ._commits_cache }
150157
@@ -158,13 +165,29 @@ def _fetch_until_timestamp(self, timestamp: datetime) -> Optional[str]:
158165 if commit .commit .author .date <= timestamp :
159166 break
160167
168+ info (f" [LazyFileHistory] Fetched new commits { len (newly_fetched )} " )
169+ if len (newly_fetched ) > 0 :
170+ newly_fetched .sort (key = lambda c : c .commit .author .date )
171+ info (
172+ f" [LazyFileHistory] Fetched new commits with latest: { newly_fetched [- 1 ].commit .author .date } , oldest:{ newly_fetched [- 1 ].commit .author .date } "
173+ )
174+
161175 self ._commits_cache .extend (newly_fetched )
162- self ._commits_cache .sort (key = lambda c : c .commit .author .date , reverse = True )
176+ self ._commits_cache .sort (key = lambda c : c .commit .author .date )
163177
164178 if not newly_fetched :
165179 self ._fetched_all_commits = True
166180
167- return self ._find_earliest_after_in_cache (timestamp )
181+ return self ._find_closest_before_or_equal_in_cache (timestamp )
182+
183+ def _find_closest_before_or_equal_in_cache (
184+ self , timestamp : datetime
185+ ) -> Optional [str ]:
186+ commits_before_equal = [
187+ c for c in self ._commits_cache if c .commit .author .date <= timestamp
188+ ]
189+ commits_before_equal .sort (key = lambda c : c .commit .author .date )
190+ return commits_before_equal [- 1 ] if commits_before_equal else None
168191
169192 def _fetch_content_for_commit (self , commit : Any ) -> str :
170193 if commit .sha not in self ._content_cache :
@@ -286,7 +309,7 @@ def create_runner_labels(
286309 )
287310
288311 if len (all_runners_configs .keys ()) == 0 :
289- warning (
312+ raise ValueError (
290313 " [method: create_runner_labels] No runner labels found in the github config files, something is wrong, please investigate."
291314 )
292315
@@ -324,7 +347,7 @@ def create_runner_labels(
324347def get_runner_config (
325348 retriever : LazyFileHistory , start_time : str | datetime
326349) -> Dict [str , Dict [str , Any ]]:
327- contents = retriever .get_version_after_timestamp (start_time )
350+ contents = retriever .get_version_close_to_timestamp (start_time )
328351 if contents :
329352 return explode_runner_variants (yaml .safe_load (contents ))
330353 return {"runner_types" : {}}
@@ -590,7 +613,6 @@ def _add_runner_labels(
590613 old_lf_lf_runner_config_retriever ,
591614 ) -> None :
592615 # create dictionary of tags with set of targeting machine types
593-
594616 lf_runner_config = get_runner_config (lf_runner_config_retriever , start_time )
595617 if not lf_runner_config or not lf_runner_config ["runner_types" ]:
596618 lf_runner_config = get_runner_config (
@@ -1170,7 +1192,9 @@ def get_latest_queue_time_histogram_table(
11701192 query = """
11711193 SELECT toUnixTimestamp(MAX(time)) as latest FROM fortesting.oss_ci_queue_time_histogram
11721194 """
1173- info (" Getting lastest timestamp from misc.oss_ci_queue_time_histogram...." )
1195+ info (
1196+ " Getting lastest timestamp from fortesting.oss_ci_queue_time_histogram...."
1197+ )
11741198 res = cc .query (query , {})
11751199
11761200 if res .row_count != 1 :
0 commit comments