|
12 | 12 | "url": "https://github.com/publiccodenet/publiccode.net.git", |
13 | 13 | "branch": "main" |
14 | 14 | }, |
| 15 | + "about.publiccode.net": { |
| 16 | + "url": "https://github.com/publiccodenet/about.git", |
| 17 | + "branch": "main" |
| 18 | + }, |
| 19 | + "blog.publiccode.net": { |
| 20 | + "url": "https://github.com/publiccodenet/blog.git", |
| 21 | + "branch": "main" |
| 22 | + }, |
15 | 23 | "community-implementation-guide-standard": { |
16 | 24 | "url": "https://github.com/publiccodenet/community-implementation-guide-standard.git", |
17 | 25 | "branch": "main" |
|
22 | 30 | } |
23 | 31 | }, |
24 | 32 | "ignore_patterns" : { |
| 33 | + "^http[s]\\?://archive\\.org/web/": "often times out", |
25 | 34 | "^http[s]\\?://twitter\\.com": "302; does not serve scripts", |
| 35 | + "^http[s]\\?://linkedin\\.com": "302; does not serve scripts", |
| 36 | + "^http[s]\\?://www\\.linkedin\\.com": "999; does not serve scripts", |
| 37 | + "^http[s]\\?://chat\\.openai\\.com": "302; does not serve scripts", |
| 38 | + "^https://github.com/org_name/codebase_name.git": "bogus example URL", |
26 | 39 | "^http[s]\\?://github\\.com/.*/edit/": "may point to yet-to-exist page", |
27 | 40 | "^http[s]\\?://docs\\.github\\.com/": "seems blocked as DoS protection", |
| 41 | + "^http[s]\\?://github\\.com/[0-9A-Za-z_\\-\\./]\\+/\\(issues\\|pull\\)/[0-9]\\+[\\.,)]*": "ignore github issues and PRs", |
28 | 42 | "plausible\\.io/js/plausible\\.js": "does not serve to scripts", |
29 | 43 | "opensource\\.org": "failed: 503 No error", |
30 | 44 | "belastingdienst\\.nl/wps/wcm/connect/bldcontenten": "regular timeouts", |
31 | 45 | "reclameland\\.nl/drukken/softcover-boeken": "failed: 403 No error", |
| 46 | + "^https://help.miro.com": "403 to script", |
32 | 47 | "www\\.dta\\.gov\\.au/help-and-advice": "failed: 403 No error", |
33 | 48 | "^https://pixabay\\.com/": "gives 403 to curl", |
34 | 49 | "^https://fonts.google.com/download?family=": "bash param in the URL", |
35 | 50 | "https://standard.publiccode.net/criteria/\\\\2.html": "regex in URL", |
36 | 51 | "^https://www.go-fair.org/": "gives 400s when run as GitHub workflow", |
| 52 | + "^https://support\\.google\\.com/": "gives 404 to curl", |
| 53 | + "^https://www\\.komoot\\.com/": "gives 404 to curl, works in browser", |
37 | 54 | "^https://www\\.grammarly\\.com/": "HTTP/2 405, allow: POST, GET", |
| 55 | + "^https://giphy\\.com": "gives 503 to curl", |
38 | 56 | "^https://www\\.lonebeard\\.com": "defunct, referenced in binary files", |
39 | 57 | "^http[s]\\?://cipa\\.jp/exif": "defunct, embedded in some .jpg files", |
40 | 58 | "^http://ns\\.adobe\\.com/": "defunct, embedded in .jpg", |
41 | | - "^http://www.gimp.org/xmp/": "defunct, embedded in .jpg", |
42 | | - "amsterdam\\.nl/en/": "frequent timeouts" |
| 59 | + "^http://www\\.gimp\\.org/xmp/": "defunct, embedded in .jpg", |
| 60 | + "^http://www\\.inkscape\\.org/namespaces/inkscape": "defunct, in .svg", |
| 61 | + "^http[s]\\?://sodipodi\\.sourceforge\\.net/DTD/sodipodi-0\\.dtd": "defunct, in SVGs", |
| 62 | + "^http[s]\\?://www\\.omg\\.org/spec/.*/20100524": "defunct, embedded in old .bpmn files", |
| 63 | + "^http[s]\\?://bpmn.io/schema/bpmn": "unreliable", |
| 64 | + "^http[s]\\?://www\\.un\\.org/en/content/": "frequent timeout", |
| 65 | + "^http[s]\\?://arkitektur\\.digst\\.dk/node/1173": "times out", |
| 66 | + "^http[s]\\?://eur-lex\\.europa\\.eu/legal-content/EN/TXT": "timeouts", |
| 67 | + "^https://www\\.uwv\\.nl": "gives 404 to curl", |
| 68 | + "listennotes\\.com/": "frequent timeouts", |
| 69 | + "lists\\.publiccode\\.net/mailman/": "frequent timeouts", |
| 70 | + "https://wetten\\.overheid\\.nl/BWBR0025279/2013-01-01": "times out", |
| 71 | + "amsterdam\\.nl/en/": "frequent timeouts", |
| 72 | + "^http[s]\\?://www\\.figma\\.com": "gives 404 to curl" |
| 73 | + }, |
| 74 | + "transforms" : { |
| 75 | + "sed 's@/[\\.,)]*$@/@'": |
| 76 | + "remove trailing punctuation from links ending in '/'", |
| 77 | + "sed 's@\\.net[\\.,)]*[email protected]@'": |
| 78 | + "remove trailing punctuation from links ending in '.net'", |
| 79 | + "sed 's@\\.com[\\.,)]*[email protected]@'": |
| 80 | + "remove trailing punctuation from links ending in '.com'", |
| 81 | + "sed 's@^\\(http.*\\.html\\)[\\.,)]*$@\\1@'": |
| 82 | + "remove trailing punctuation from links ending in '.html'", |
| 83 | + "sed 's@^\\(http.*\\.pdf\\)[\\.,)]*$@\\1@'": |
| 84 | + "remove trailing punctuation from links ending in '.pdf'", |
| 85 | + "sed 's@Open_air_school).$@Open_air_school@'": |
| 86 | + "remove trailing punctuation'", |
| 87 | + "sed 's@\\(nextcloud/index.php/s/[0-9a-zA-Z]*\\)[\\.,)]*$@\\1@'": |
| 88 | + "remove trailing punctuation from nextcloud files", |
| 89 | + "sed 's@poortwachter[\\.,)]*$@poortwachter@'": |
| 90 | + "remove trailing punctuation", |
| 91 | + "sed 's@\\(publiccode\\.net/careers/marketing\\)[\\.),:]*@\\1@'": |
| 92 | + "remove trailing punctuation" |
43 | 93 | } |
44 | 94 | } |
0 commit comments