Skip to content

Commit 944b8b3

Browse files
analysis: add functionality for outlier determination
1 parent fe8234c commit 944b8b3

File tree

4 files changed

+208
-0
lines changed

4 files changed

+208
-0
lines changed

src/main/python/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ tqdm = "*"
2020
run-eval = "result_analysis.__main__:main"
2121
power-simulation = "result_analysis.simulation:main"
2222
find-example = "result_analysis.__main__:example"
23+
find-outliers = "result_analysis.__main__:outliers"
2324

2425
[build-system]
2526
requires = ["poetry-core>=1.0.0"]

src/main/python/result_analysis/__main__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
find_example,
33
metrics_table_generation,
44
)
5+
from result_analysis.analyze_results import find_outliers
56

67
results_dir = "../../../evaluation-workdir/results/"
78
repo_sample = "../../../evaluation-workdir/data/repo-sample.yaml"
@@ -23,5 +24,9 @@ def example():
2324
find_example(results_dir + "rep-1/", repo_sample, False)
2425

2526

27+
def outliers():
28+
find_outliers(results_dir + "rep-1/", repo_sample, False)
29+
30+
2631
if __name__ == "__main__":
2732
main()
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
from result_analysis.io import load_repositories
2+
from result_analysis.io import load_all_results
3+
from result_analysis.tables import list_all_dirs
4+
from result_analysis.result_handling import accumulate_data_per_patcher
5+
from result_analysis.eval_setup import Metric, Patcher
6+
from result_analysis.result_handling import (
7+
non_trivial_results,
8+
results_per_repo,
9+
)
10+
11+
languages = [
12+
("Python", "Python"),
13+
("JavaScript", "\\multicolumn{1}{c}{JavaS.}"),
14+
("Go", "Go"),
15+
("C++", "\\multicolumn{1}{c}{C++}"),
16+
("Java", "Java"),
17+
("TypeScript", "\\multicolumn{1}{c}{TypeS.}"),
18+
("C", "C"),
19+
("C#", "C#"),
20+
("PHP", "PHP"),
21+
("Rust", "Rust"),
22+
]
23+
24+
25+
def find_outliers(path_to_results, path_to_repo_list, only_non_trivial):
26+
global languages
27+
repos = load_repositories(path_to_repo_list)
28+
29+
results_per_patcher = {}
30+
for patcher in Patcher: # Patcher is an enum
31+
results = load_all_results(path_to_results, patcher)
32+
# Filter trivial results
33+
if only_non_trivial:
34+
results = non_trivial_results(results)
35+
# Group results by repo
36+
results_per_patcher[patcher] = results_per_repo(results, repos)
37+
38+
results = results_per_patcher[Patcher.MPatch]
39+
print("found " + str(len(results)) + " repo results")
40+
41+
worst_results = []
42+
print(str(len(results.keys())))
43+
for repo in results.keys():
44+
repo_results_mpatch = results[repo]
45+
46+
repo_results_mpatch = sorted(
47+
repo_results_mpatch, key=lambda x: x.outcome_classification.num_incorrect()
48+
)
49+
50+
repo_results_mpatch = reversed(repo_results_mpatch)
51+
52+
i = 0
53+
for result in repo_results_mpatch:
54+
if i > 4:
55+
break
56+
i += 1
57+
worst_results.append(result)
58+
59+
worst_results = sorted(
60+
worst_results, key=lambda x: x.outcome_classification.num_incorrect()
61+
)
62+
63+
worst_results = reversed(worst_results)
64+
i = 0
65+
for result in worst_results:
66+
if i > 4:
67+
break
68+
i += 1
69+
print(result)
70+
print()
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
- total_number_of_branches: '360'
2+
language: Java
3+
repo_name: elastic/elasticsearch
4+
total_number_of_commits: '141531'
5+
total_number_of_results: '5'
6+
total_number_of_committers: '652'
7+
- - search_method: MessageScan
8+
cherry_and_target:
9+
cherry:
10+
id: da511f4e033db6e8a6aa2a54b23e906b5e026845
11+
parent_ids:
12+
- 5f51b08b7a4473f08c1245c629b6a8dbc3594db0
13+
message: "SQL: Implement FORMAT function (#55454)\n\nImplement FORMAT according to the SQL Server spec: https://docs.microsoft.com/en-us/sql/t-sql/functions/format-transact-sql?view=sql-server-ver15#ExampleD by translating to the java.time patterns used in DATETIME_FORMAT.\r\n\r\nCloses: #54965\r\n\r\nCo-authored-by: Marios Trivyzas <[email protected]>\r\nCo-authored-by: Bogdan Pintea <[email protected]>\r\nCo-authored-by: Andrei Stefan <[email protected]>"
14+
author: Binu R J <[email protected]>
15+
committer: GitHub <[email protected]>
16+
time: 'Time { raw: git_time { time: 1600695131, offset: 120, sign: 43 } }'
17+
target:
18+
id: 1f612cccbb1be786989db99369d99b846e29d9ac
19+
parent_ids:
20+
- cadd5dc53f8d58f03aaaa1e343e2466b4d53a567
21+
message: "SQL: Implement FORMAT function (#55454) (#62701)\n\nImplement FORMAT according to the SQL Server spec: https://docs.microsoft.com/en-us/sql/t-sql/functions/format-transact-sql?view=sql-server-ver15#ExampleD by translating to the java.time patterns used in DATETIME_FORMAT.\r\n\r\nCloses: #54965\r\n\r\nCo-authored-by: Marios Trivyzas <[email protected]>\r\nCo-authored-by: Bogdan Pintea <[email protected]>\r\nCo-authored-by: Andrei Stefan <[email protected]>\r\n(cherry picked from commit da511f4e033db6e8a6aa2a54b23e906b5e026845)\r\n"
22+
author: Marios Trivyzas <[email protected]>
23+
committer: GitHub <[email protected]>
24+
time: 'Time { raw: git_time { time: 1600708924, offset: 120, sign: 43 } }'
25+
is_trivial: false
26+
- search_method: MessageScan
27+
cherry_and_target:
28+
cherry:
29+
id: 8f7afbdeb9295999b48a6c36db5b31cbe0cee432
30+
parent_ids:
31+
- 1a099fa07531d96efd9f771ff9dc8b6775fd35fa
32+
message: "SQL: Fix issue with timezone when paginating (#52101)\n\nPreviously, when the specified (or default) fetchSize led to\r\nsubsequent HTTP requests and the usage of cursors, those subsequent\r\nwere no longer using the client timezone specified in the initial\r\nSQL query. As a consequence, Even though the query is executed once\r\n(with the correct timezone) the processing of the query results by\r\nthe HitExtractors in the next pages was done using the default\r\ntimezone Z. This could lead to incorrect results.\r\n\r\nFix the issue by correctly using the initially specified timezone,\r\nwhich is found in the deserialisation of the cursor string.\r\n\r\nFixes: #51258"
33+
author: Marios Trivyzas <[email protected]>
34+
committer: GitHub <[email protected]>
35+
time: 'Time { raw: git_time { time: 1581429546, offset: 60, sign: 43 } }'
36+
target:
37+
id: 4797347ae8f3d2024452d150e2b84da53b990de5
38+
parent_ids:
39+
- 3976770cccdc693438d39b56f535ed96e3d7d09b
40+
message: |
41+
SQL: [Tests] Add integ tests for timezone and pagination (#52101)
42+
43+
Cherry pick and adapt tests to validate correct behaviour
44+
regarding processing of results that involve the use of the client
45+
configured timezone by the HitExtractors when paginating over
46+
the results of the query (use of cursors).
47+
48+
(cherry picked from commit 8f7afbdeb9295999b48a6c36db5b31cbe0cee432)
49+
author: Marios Trivyzas <[email protected]>
50+
committer: Marios Trivyzas <[email protected]>
51+
time: 'Time { raw: git_time { time: 1581436246, offset: 60, sign: 43 } }'
52+
is_trivial: false
53+
- search_method: MessageScan
54+
cherry_and_target:
55+
cherry:
56+
id: 12072b200efedfbc894bd33d781a392b26789071
57+
parent_ids:
58+
- e753e12f613d6abf5363d029b0dfda9aa5547721
59+
message: "Run code snippets in ReindexDocumentationIT (#35165)\n\n\r\nCloses #32093"
60+
author: Vladimir Dolzhenko <[email protected]>
61+
committer: GitHub <[email protected]>
62+
time: 'Time { raw: git_time { time: 1541163135, offset: 60, sign: 43 } }'
63+
target:
64+
id: 16c7bcc58e4ab534ae9876035ec72608735f6574
65+
parent_ids:
66+
- d91dc731004cd8241ddf333b089b21e29a645f43
67+
message: |
68+
Run code snippets in ReindexDocumentationIT (#35165)
69+
70+
Closes #32093
71+
72+
(cherry picked from commit 12072b200efedfbc894bd33d781a392b26789071)
73+
author: Vladimir Dolzhenko <[email protected]>
74+
committer: Vladimir Dolzhenko <[email protected]>
75+
time: 'Time { raw: git_time { time: 1541164166, offset: 60, sign: 43 } }'
76+
is_trivial: false
77+
- search_method: MessageScan
78+
cherry_and_target:
79+
cherry:
80+
id: 63fa4184ec1965c3223ce15dd618c3c508de1162
81+
parent_ids:
82+
- 710c60d4d73cde477da2c22d92fe5b55e97608a5
83+
message: |-
84+
Add 8.5.2 release notes (#91797)
85+
86+
Add 8.5.2 release notes
87+
author: Bogdan Pintea <[email protected]>
88+
committer: GitHub <[email protected]>
89+
time: 'Time { raw: git_time { time: 1669138368, offset: 60, sign: 43 } }'
90+
target:
91+
id: 2cf3276fa43c31193a13aa6fa7006fad207d51a6
92+
parent_ids:
93+
- 4e0a18fc8ee8b1931a84da74dfdf7eff217df2ba
94+
message: |-
95+
Add 8.5.2 release notes (#91797) (#91824)
96+
97+
Add 8.5.2 release notes
98+
99+
(cherry picked from commit 63fa4184ec1965c3223ce15dd618c3c508de1162)
100+
author: Bogdan Pintea <[email protected]>
101+
committer: GitHub <[email protected]>
102+
time: 'Time { raw: git_time { time: 1669142152, offset: -300, sign: 45 } }'
103+
is_trivial: false
104+
- search_method: MessageScan
105+
cherry_and_target:
106+
cherry:
107+
id: 9f1875ee2cb72b7e70bcd3fca21c36815d826f6d
108+
parent_ids:
109+
- a419993068f0a5692da37de4884c0e7bdc7fd315
110+
message: |-
111+
[DOCS] Fix docs for user profiles (#102452)
112+
113+
* [DOCS] Fix docs for user profiles
114+
author: Fabio Busatto <[email protected]>
115+
committer: GitHub <[email protected]>
116+
time: 'Time { raw: git_time { time: 1700732297, offset: 60, sign: 43 } }'
117+
target:
118+
id: 5459965e613ab84edc4e6e4436cd56a8a1ff07d3
119+
parent_ids:
120+
- e226621c79f8d792e63dbbf3949d36abaebde43d
121+
message: |-
122+
[DOCS] Fix docs for user profiles (#102452) (#102521)
123+
124+
* [DOCS] Fix docs for user profiles
125+
126+
(cherry picked from commit 9f1875ee2cb72b7e70bcd3fca21c36815d826f6d)
127+
128+
Co-authored-by: Fabio Busatto <[email protected]>
129+
author: Abdon Pijpelink <[email protected]>
130+
committer: GitHub <[email protected]>
131+
time: 'Time { raw: git_time { time: 1700739305, offset: -300, sign: 45 } }'
132+
is_trivial: false

0 commit comments

Comments
 (0)