Skip to content

Commit e5e0da0

Browse files
authored
Merge pull request #2549 from sherlock-project/add/instapaper
feat: add instapaper
2 parents 9f5b7e1 + dc61cdc commit e5e0da0

File tree

4 files changed

+126
-8
lines changed

4 files changed

+126
-8
lines changed

.github/workflows/validate_modified_targets.yml

Lines changed: 43 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ jobs:
1414
- name: Checkout repository
1515
uses: actions/checkout@v5
1616
with:
17+
ref: ${{ github.event.pull_request.head.sha }}
1718
fetch-depth: 0
1819

1920
- name: Set up Python
@@ -38,13 +39,21 @@ jobs:
3839
3940
# Discover changes
4041
git show origin/${{ github.base_ref }}:sherlock_project/resources/data.json > data.json.base
42+
cp sherlock_project/resources/data.json data.json.head
43+
4144
CHANGED=$(
42-
jq -r --slurpfile base data.json.base --slurpfile head sherlock_project/resources/data.json '
43-
[
44-
($head[0] | keys_unsorted[]) as $key
45-
| select(($base[0][$key] != $head[0][$key]) or ($base[0][$key] | not))
46-
| $key
47-
] | unique | join(",")'
45+
python - <<'EOF'
46+
import json
47+
with open("data.json.base") as f: base = json.load(f)
48+
with open("data.json.head") as f: head = json.load(f)
49+
50+
changed = []
51+
for k, v in head.items():
52+
if k not in base or base[k] != v:
53+
changed.append(k)
54+
55+
print(",".join(sorted(changed)))
56+
EOF
4857
)
4958
5059
# Preserve changelist
@@ -53,12 +62,38 @@ jobs:
5362
5463
- name: Validate modified targets
5564
if: steps.discover-modified.outputs.changed_targets != ''
65+
continue-on-error: true
66+
run: |
67+
$(poetry env activate)
68+
pytest -q --tb no -rA -m validate_targets -n 20 --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}" --junitxml=validation_results.xml
69+
deactivate
70+
71+
- name: Prepare validation summary
72+
if: steps.discover-modified.outputs.changed_targets != ''
73+
id: prepare-summary
5674
run: |
5775
$(poetry env activate)
58-
pytest -q --tb no -rA -m validate_targets -n 20 --chunked-sites "${{ steps.discover-modified.outputs.changed_targets }}"
76+
summary=$(
77+
python devel/summarize_site_validation.py validation_results.xml || echo "Failed to generate summary of test results"
78+
)
5979
deactivate
80+
echo "$summary" > validation_summary.md
81+
82+
- name: Announce validation results
83+
if: steps.discover-modified.outputs.changed_targets != ''
84+
uses: actions/github-script@v8
85+
with:
86+
script: |
87+
const fs = require('fs');
88+
const body = fs.readFileSync('validation_summary.md', 'utf8');
89+
github.rest.issues.createComment({
90+
issue_number: context.payload.pull_request.number,
91+
owner: context.repo.owner,
92+
repo: context.repo.repo,
93+
body: body,
94+
});
6095
61-
- name: Announce skip if no modified targets
96+
- name: This step shows as ran when no modifications are found
6297
if: steps.discover-modified.outputs.changed_targets == ''
6398
run: |
6499
echo "No modified targets found"

devel/summarize_site_validation.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#!/usr/bin/env python
2+
# This module summarizes the results of site validation tests queued by
3+
# workflow validate_modified_targets for presentation in Issue comments.
4+
5+
from defusedxml import ElementTree as ET
6+
import sys
7+
from pathlib import Path
8+
9+
def summarize_junit_xml(xml_path: Path) -> str:
10+
tree = ET.parse(xml_path)
11+
root = tree.getroot()
12+
suite = root.find('testsuite')
13+
14+
pass_message: str = ":heavy_check_mark: &nbsp; Pass"
15+
fail_message: str = ":x: &nbsp; Fail"
16+
17+
if suite is None:
18+
raise ValueError("Invalid JUnit XML: No testsuite found")
19+
20+
summary_lines: list[str] = []
21+
summary_lines.append("#### Automatic validation of changes\n")
22+
summary_lines.append("| Target | F+ Check | F- Check |")
23+
summary_lines.append("|---|---|---|")
24+
25+
failures = int(suite.get('failures', 0))
26+
errors_detected: bool = False
27+
28+
results: dict[str, dict[str, str]] = {}
29+
30+
for testcase in suite.findall('testcase'):
31+
test_name = testcase.get('name').split('[')[0]
32+
site_name = testcase.get('name').split('[')[1].rstrip(']')
33+
failure = testcase.find('failure')
34+
error = testcase.find('error')
35+
36+
if site_name not in results:
37+
results[site_name] = {}
38+
39+
if test_name == "test_false_neg":
40+
results[site_name]['F- Check'] = pass_message if failure is None and error is None else fail_message
41+
elif test_name == "test_false_pos":
42+
results[site_name]['F+ Check'] = pass_message if failure is None and error is None else fail_message
43+
44+
if error is not None:
45+
errors_detected = True
46+
47+
for result in results:
48+
summary_lines.append(f"| {result} | {results[result].get('F+ Check', 'Error!')} | {results[result].get('F- Check', 'Error!')} |")
49+
50+
if failures > 0:
51+
summary_lines.append("\n___\n" +
52+
"\nFailures were detected on at least one updated target. Commits containing accuracy failures" +
53+
" will often not be merged (unless a rationale is provided, such as false negatives due to regional differences).")
54+
55+
if errors_detected:
56+
summary_lines.append("\n___\n" +
57+
"\n**Errors were detected during validation. Please review the workflow logs.**")
58+
59+
return "\n".join(summary_lines)
60+
61+
if __name__ == "__main__":
62+
if len(sys.argv) != 2:
63+
print("Usage: summarize_site_validation.py <junit-xml-file>")
64+
sys.exit(1)
65+
66+
xml_path: Path = Path(sys.argv[1])
67+
if not xml_path.is_file():
68+
print(f"Error: File '{xml_path}' does not exist.")
69+
sys.exit(1)
70+
71+
summary: str = summarize_junit_xml(xml_path)
72+
print(summary)

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,5 +60,9 @@ rstr = "^3.2.2"
6060
pytest = "^8.4.2"
6161
pytest-xdist = "^3.8.0"
6262

63+
64+
[tool.poetry.group.ci.dependencies]
65+
defusedxml = "^0.7.1"
66+
6367
[tool.poetry.scripts]
6468
sherlock = 'sherlock_project.sherlock:main'

sherlock_project/resources/data.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,6 +1142,13 @@
11421142
"urlProbe": "https://imginn.com/{}",
11431143
"username_claimed": "instagram"
11441144
},
1145+
"Instapaper": {
1146+
"errorType": "status_code",
1147+
"request_method": "GET",
1148+
"url": "https://www.instapaper.com/p/{}",
1149+
"urlMain": "https://www.instapaper.com/",
1150+
"username_claimed": "john"
1151+
},
11451152
"Instructables": {
11461153
"errorType": "status_code",
11471154
"url": "https://www.instructables.com/member/{}",

0 commit comments

Comments
 (0)