Skip to content

Commit 9492bbd

Browse files
authored
[ez] Remove old envs from test times json (#6961)
Please review #6964 first. Not a functional dependency but it does some file moves that this one follows too Reason: test times json super big, and has build environments that don't exist anymore. Previously I left them in because there might be really old jobs that depend on the old build environments and I thought the file would be easy to manually clean up but its just easier to do it automatically. Testing: ran script before + after this change and saw that only things got removed. Spot checked that things that got removed are not on the front page of HUD old vs new diff (just shows that the only change is that things got deleted): <img width="47" height="944" alt="image" src="https://github.com/user-attachments/assets/f276ecc5-f656-4020-b25b-9e7ca51d4ecd" />
1 parent 03b8fcd commit 9492bbd

File tree

4 files changed

+56
-0
lines changed

4 files changed

+56
-0
lines changed

tools/torchci/tests/test_update_test_times.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import unittest
2+
from unittest.mock import patch
23

34
from torchci.update_test_times import gen_test_class_times, gen_test_file_times
45

56

7+
@patch("torchci.update_test_times.clean_up_test_times", lambda x: x)
68
class TestUpdateTestTimesFile(unittest.TestCase):
79
def make_db_row(self, job: str, config: str, file: str, time: float):
810
return {"base_name": job, "test_config": config, "file": file, "time": time}
@@ -98,6 +100,7 @@ def test_gen_test_file_times_old_values_still_present(self) -> None:
98100
self.assertDictEqual(res, expected)
99101

100102

103+
@patch("torchci.update_test_times.clean_up_test_times", lambda x: x)
101104
class TestUpdateTestTimesClass(unittest.TestCase):
102105
def make_db_row(
103106
self, job: str, config: str, file: str, classname: str, time: float
@@ -215,5 +218,35 @@ def test_gen_test_class_times_old_values_still_present(self) -> None:
215218
self.assertDictEqual(res, expected)
216219

217220

221+
class TestCleanUpOldBuildEnvs(unittest.TestCase):
222+
def make_db_row(self, job: str, config: str, file: str, time: float):
223+
return {"base_name": job, "test_config": config, "file": file, "time": time}
224+
225+
def test_clean_up_simple(self) -> None:
226+
# Simple test to make sure the other build envs are removed
227+
data = [
228+
self.make_db_row("job1", "config", "a", 1),
229+
self.make_db_row("job2", "config", "a", 1),
230+
self.make_db_row("job3", "config", "a", 1),
231+
]
232+
with patch(
233+
"torchci.update_test_times.query_clickhouse_saved",
234+
return_value=[{"base_name": "job1"}],
235+
):
236+
res = gen_test_file_times(data, {})
237+
expected = {
238+
"job1": {"config": {"a": 1}},
239+
"default": {
240+
"config": {
241+
"a": 1.0,
242+
},
243+
"default": {
244+
"a": 1.0,
245+
},
246+
},
247+
}
248+
self.assertDictEqual(res, expected)
249+
250+
218251
if __name__ == "__main__":
219252
unittest.main()

tools/torchci/update_test_times.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
TEST_TIME_PER_FILE_PERIODIC_JOBS_QUERY_NAME = "test_times/per_file_periodic_jobs"
1313
TEST_TIME_PER_CLASS_QUERY_NAME = "test_times/per_class"
1414
TEST_TIME_PER_CLASS_PERIODIC_JOBS_QUERY_NAME = "test_times/per_class_periodic_jobs"
15+
JOB_NAMES_PAST_MONTH_QUERY_NAME = "test_times/job_names_past_month"
1516

1617

1718
def get_file_data_from_db():
@@ -77,6 +78,16 @@ def convert_test_class_times_to_default_dict(d):
7778
return new_d
7879

7980

81+
def clean_up_test_times(test_times) -> None:
82+
# Remove old environments that no longer run jobs
83+
job_names_past_month = query_clickhouse_saved(JOB_NAMES_PAST_MONTH_QUERY_NAME, {})
84+
job_names = {row["base_name"] for row in job_names_past_month}
85+
job_names.add("default") # Add default to job names
86+
for env in list(test_times.keys()):
87+
if env not in job_names:
88+
del test_times[env]
89+
90+
8091
def gen_test_file_times(db_results, old_test_times):
8192
# Use old test times because sometimes we want to manually edit the test
8293
# times json and want those changes to persist. Unfortunately this means
@@ -108,6 +119,8 @@ def gen_test_file_times(db_results, old_test_times):
108119
# usually in the default test config like distributed
109120
test_times["default"]["default"] = test_times_no_test_config
110121

122+
clean_up_test_times(test_times)
123+
111124
return test_times
112125

113126

@@ -153,6 +166,8 @@ def gen_test_class_times(db_results, old_test_times):
153166
# usually in the default test config like distributed
154167
test_times["default"]["default"] = test_times_no_test_config
155168

169+
clean_up_test_times(test_times)
170+
156171
return test_times
157172

158173

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"params": {},
3+
"tests": [{}]
4+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
SELECT DISTINCT REGEXP_EXTRACT(j.name, '^(.*) /', 1) AS base_name
2+
FROM
3+
default.workflow_job j
4+
WHERE j.created_at > now() - INTERVAL 1 MONTH

0 commit comments

Comments
 (0)