Skip to content

Commit 3170781

Browse files
committed
Rework timeseries report to iterate git history only once
1 parent 1ec1e1c commit 3170781

File tree

1 file changed

+115
-67
lines changed

1 file changed

+115
-67
lines changed

misc/scripts/library-coverage/generate-timeseries.py

Lines changed: 115 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ def get_packages(lang, query):
6262
shutil.rmtree(db)
6363

6464

65-
current_dir = os.getcwd()
6665
working_dir = ""
6766
if len(sys.argv) > 1:
6867
working_dir = sys.argv[1]
@@ -77,101 +76,150 @@ def get_packages(lang, query):
7776
"csharp", "C#", ".cs", "csharp/ql/src/meta/frameworks/Coverage.ql")
7877
]
7978

80-
# todo: change this when we cover multiple languages. We should compute the SHAs
81-
# only once and not per language
8279
output_prefix = "framework-coverage-timeseries-"
83-
for lang in settings.languages:
84-
os.chdir(current_dir)
85-
config = [c for c in configs if c.lang == lang][0]
86-
with open(output_prefix + config.lang + ".csv", 'w', newline='') as csvfile_total:
87-
with open(output_prefix + config.lang + "-packages.csv", 'w', newline='') as csvfile_packages:
88-
csvwriter_total = csv.writer(csvfile_total)
89-
csvwriter_packages = csv.writer(csvfile_packages)
90-
csvwriter_total.writerow(
91-
["SHA", "Date", "Sources", "Sinks", "Summaries"])
92-
csvwriter_packages.writerow(
93-
["SHA", "Date", "Framework", "Package", "Sources", "Sinks", "Summaries"])
94-
95-
os.chdir(working_dir)
96-
97-
utils.subprocess_run(["git", "checkout", "main"])
98-
99-
current_sha = Git.get_output(["git", "rev-parse", "HEAD"])
100-
current_date = Git.get_date(current_sha)
10180

102-
# Read the additional framework data, such as URL, friendly name from the latest commit
103-
input_framework_csv = settings.documentation_folder_no_prefix + "frameworks.csv"
104-
frameworks = fr.FrameworkCollection(
105-
input_framework_csv.format(language=config.lang))
81+
languages_to_process = set()
82+
language_utils = {}
10683

107-
while True:
108-
print("Getting stats for " + current_sha)
109-
utils.subprocess_run(["git", "checkout", current_sha])
84+
# Try to create output files for each language:
85+
for lang in settings.languages:
86+
try:
87+
file_total = open(output_prefix + lang + ".csv", 'w', newline='')
88+
file_packages = open(output_prefix + lang +
89+
"-packages.csv", 'w', newline='')
90+
csvwriter_total = csv.writer(file_total)
91+
csvwriter_packages = csv.writer(file_packages)
92+
except:
93+
print(
94+
f"Unexpected error while opening files for {lang}:", sys.exc_info()[0])
95+
if file_total is not None:
96+
file_total.close()
97+
if file_packages is not None:
98+
file_packages.close()
99+
else:
100+
languages_to_process.add(lang)
101+
language_utils[lang] = {
102+
"file_total": file_total,
103+
"file_packages": file_packages,
104+
"csvwriter_total": csvwriter_total,
105+
"csvwriter_packages": csvwriter_packages
106+
}
107+
108+
try:
109+
# Write headers
110+
for lang in languages_to_process:
111+
csvwriter_total = language_utils[lang]["csvwriter_total"]
112+
csvwriter_packages = language_utils[lang]["csvwriter_packages"]
113+
csvwriter_total.writerow(
114+
["SHA", "Date", "Sources", "Sinks", "Summaries"])
115+
csvwriter_packages.writerow(
116+
["SHA", "Date", "Framework", "Package", "Sources", "Sinks", "Summaries"])
117+
118+
os.chdir(working_dir)
110119

111-
try:
112-
packages = get_packages(config.lang, config.ql_path)
120+
utils.subprocess_run(["git", "checkout", "main"])
113121

114-
csvwriter_total.writerow([
115-
current_sha,
116-
current_date,
117-
packages.get_part_count("source"),
118-
packages.get_part_count("sink"),
119-
packages.get_part_count("summary")])
122+
current_sha = Git.get_output(["git", "rev-parse", "HEAD"])
123+
current_date = Git.get_date(current_sha)
120124

121-
matched_packages = set()
125+
# Read the additional framework data, such as URL, friendly name from the latest commit
126+
for lang in languages_to_process:
127+
input_framework_csv = settings.documentation_folder_no_prefix + "frameworks.csv"
128+
language_utils[lang]["frameworks"] = fr.FrameworkCollection(
129+
input_framework_csv.format(language=lang))
130+
language_utils[lang]["config"] = [
131+
c for c in configs if c.lang == lang][0]
122132

123-
for framework in frameworks.get_frameworks():
124-
framework: fr.Framework = framework
133+
while True:
134+
utils.subprocess_run(["git", "checkout", current_sha])
135+
for lang in languages_to_process.copy():
136+
try:
137+
print(
138+
f"Getting stats for {lang} at {current_sha} on {current_date.isoformat()}")
125139

126-
row = [current_sha, current_date,
127-
framework.name, framework.package_pattern]
140+
config: utils.LanguageConfig = language_utils[lang]["config"]
141+
frameworks: fr.FrameworkCollection = language_utils[lang]["frameworks"]
142+
csvwriter_total = language_utils[lang]["csvwriter_total"]
143+
csvwriter_packages = language_utils[lang]["csvwriter_packages"]
128144

129-
sources = 0
130-
sinks = 0
131-
summaries = 0
145+
packages = get_packages(lang, config.ql_path)
132146

133-
for package in packages.get_packages():
134-
if frameworks.get_package_filter(framework)(package):
135-
sources += package.get_part_count("source")
136-
sinks += package.get_part_count("sink")
137-
summaries += package.get_part_count("summary")
138-
matched_packages.add(package.name)
147+
csvwriter_total.writerow([
148+
current_sha,
149+
current_date,
150+
packages.get_part_count("source"),
151+
packages.get_part_count("sink"),
152+
packages.get_part_count("summary")])
139153

140-
row.append(sources)
141-
row.append(sinks)
142-
row.append(summaries)
154+
matched_packages = set()
143155

144-
csvwriter_packages.writerow(row)
156+
# Getting stats for frameworks:
157+
for framework in frameworks.get_frameworks():
158+
framework: fr.Framework = framework
145159

146-
row = [current_sha, current_date, "Others"]
160+
row = [current_sha, current_date,
161+
framework.name, framework.package_pattern]
147162

148163
sources = 0
149164
sinks = 0
150165
summaries = 0
151-
other_packages = set()
152166

153167
for package in packages.get_packages():
154-
if not package.name in matched_packages:
168+
if frameworks.get_package_filter(framework)(package):
155169
sources += package.get_part_count("source")
156170
sinks += package.get_part_count("sink")
157171
summaries += package.get_part_count("summary")
158-
other_packages.add(package.name)
172+
matched_packages.add(package.name)
159173

160-
row.append(", ".join(sorted(other_packages)))
161174
row.append(sources)
162175
row.append(sinks)
163176
row.append(summaries)
164177

165178
csvwriter_packages.writerow(row)
166179

167-
print("Collected stats for " + current_sha +
168-
" at " + current_date.isoformat())
169-
except:
170-
print("Error getting stats for " +
171-
current_sha + ". Stopping iteration.")
172-
break
180+
# Getting stats for packages not included in frameworks:
181+
row = [current_sha, current_date, "Others"]
173182

174-
current_sha, current_date = Git.get_previous_sha(
175-
current_sha, current_date)
183+
sources = 0
184+
sinks = 0
185+
summaries = 0
186+
other_packages = set()
176187

188+
for package in packages.get_packages():
189+
if not package.name in matched_packages:
190+
sources += package.get_part_count("source")
191+
sinks += package.get_part_count("sink")
192+
summaries += package.get_part_count("summary")
193+
other_packages.add(package.name)
194+
195+
row.append(", ".join(sorted(other_packages)))
196+
row.append(sources)
197+
row.append(sinks)
198+
row.append(summaries)
199+
200+
csvwriter_packages.writerow(row)
201+
202+
print(
203+
f"Collected stats for {lang} at {current_sha} on {current_date.isoformat()}")
204+
205+
except:
206+
print(
207+
f"Error getting stats for {lang} at {current_sha}. Stopping iteration for language.")
208+
languages_to_process.remove(lang)
209+
if len(languages_to_process) == 0:
210+
break
211+
212+
current_sha, current_date = Git.get_previous_sha(
213+
current_sha, current_date)
214+
215+
finally:
177216
utils.subprocess_run(["git", "checkout", "main"])
217+
218+
# Close files:
219+
for lang in settings.languages:
220+
file_total = language_utils[lang]["file_total"]
221+
file_packages = language_utils[lang]["file_packages"]
222+
if file_total is not None:
223+
file_total.close()
224+
if file_packages is not None:
225+
file_packages.close()

0 commit comments

Comments
 (0)