Skip to content

Commit cb07050

Browse files
author
markzegarelli
authored
Link Check Update (#2317)
* init * internal and external checks in place * updates * link fixes * catalog update, hide twilio event source beta * catalog update * fix internal links * some external fixes * update workflow * permissions * update command * update command * external links * fix file output, enable cron
1 parent ba94afb commit cb07050

File tree

45 files changed

+1983
-964
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1983
-964
lines changed
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
name: CheckLinks
2+
3+
# **What it does**: Runs a weekly check for broken external links.
4+
# **Why we have it**: We want to make sure that pages we link to work.
5+
# **Who does it impact**: Everyone
6+
7+
8+
9+
on:
10+
schedule:
11+
- cron: "5 4 * * SUN"
12+
13+
jobs:
14+
checklinks-external:
15+
if: github.repository == 'segmentio/segment-docs'
16+
name: Linux
17+
runs-on: ubuntu-latest
18+
env:
19+
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_REPO_SCOPE }}
20+
REPORT_AUTHOR: docsbot
21+
REPORT_LABEL: report, automated issue, broken links
22+
REPORT_REPOSITORY: segmentio/segment-docs
23+
strategy:
24+
fail-fast: false
25+
steps:
26+
- uses: actions/checkout@v2
27+
- name: Ruby
28+
uses: ruby/setup-ruby@v1
29+
with:
30+
ruby-version: 2.6 # Not needed with a .ruby-version file
31+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
32+
- name: Yarn install
33+
run: yarn install
34+
- name: Build jekyll website with
35+
run: bundle exec jekyll build
36+
- name: Run Script
37+
run: |
38+
sudo make linkcheck-external > broken_links.md
39+
- if: ${{ failure() }}
40+
name: Create issue from file
41+
id: broken-link-report
42+
uses: peter-evans/create-issue-from-file@b4f9ee0a9d4abbfc6986601d9b1a4f8f8e74c77e
43+
with:
44+
token: ${{ env.GITHUB_TOKEN }}
45+
46+
title: Broken Links Report - External
47+
content-filepath: ./broken_links.md
48+
repository: ${{ env.REPORT_REPOSITORY }}
49+
labels: ${{ env.REPORT_LABEL }}

.github/workflows/check-links.yml renamed to .github/workflows/check-links-internal.yml

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,15 @@ on:
1111
- cron: "5 4 * * SAT"
1212

1313
jobs:
14-
checklinks:
14+
checklinks-internal:
1515
if: github.repository == 'segmentio/segment-docs'
1616
name: Linux
1717
runs-on: ubuntu-latest
18+
env:
19+
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_REPO_SCOPE }}
20+
REPORT_AUTHOR: docsbot
21+
REPORT_LABEL: report, automated issue, broken links
22+
REPORT_REPOSITORY: segmentio/segment-docs
1823
strategy:
1924
fail-fast: false
2025
steps:
@@ -28,18 +33,17 @@ jobs:
2833
run: yarn install
2934
- name: Build jekyll website with
3035
run: bundle exec jekyll build
31-
- name: Link Checker
32-
uses: lycheeverse/[email protected]
36+
- name: Run Script
37+
run: |
38+
sudo make linkcheck-internal > broken_links.md
39+
- if: ${{ failure() }}
40+
name: Create issue from file
41+
id: broken-link-report
42+
uses: peter-evans/create-issue-from-file@b4f9ee0a9d4abbfc6986601d9b1a4f8f8e74c77e
3343
with:
34-
args: --no-progress _site/**/*.html -a 429 -c ./lychee.toml
35-
env:
36-
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
37-
# - name: "exclude non-400 errors"
38-
# run: |
39-
# sudo grep -v "error sending request" ./lychee/out.md > ./lychee/out2.md
40-
- name: Create Issue From File
41-
uses: peter-evans/create-issue-from-file@v3
42-
with:
43-
title: Link Checker Report
44-
content-filepath: ./lychee/out.md
45-
labels: report, automated issue
44+
token: ${{ env.GITHUB_TOKEN }}
45+
46+
title: Broken Links Report - Local
47+
content-filepath: ./broken_links.md
48+
repository: ${{ env.REPORT_REPOSITORY }}
49+
labels: ${{ env.REPORT_LABEL }}

Makefile

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,19 @@ intialize-work-dir:
2929

3030
.PHONY: build
3131
build: node_modules vendor/bundle
32-
@echo "Jekyll env: ${JEKYLL_ENV}"
33-
@chown -R jekyll /workdir
34-
@chmod -R 777 /workdir
35-
@echo "env: ${JEKYLL_ENV}"
36-
@$(BIN)/webpack --mode=production
37-
@JEKYLL_ENV=${JEKYLL_ENV} bundle exec jekyll build --trace
38-
@if [ '${BUILDKITE_BRANCH}' == 'staging' ]; then echo "updating sitemap.xml..." && sed -i -r 's/segment.com/segment.build/g' ./_site/sitemap.xml; fi;
32+
@$(BIN)/concurrently --raw --kill-others -n webpack,jekyll \
33+
"$(BIN)/webpack --mode=development --watch" \
34+
"bundle exec jekyll clean && bundle exec jekyll build -V"
35+
36+
# .PHONY: build
37+
# build: node_modules vendor/bundle
38+
# @echo "Jekyll env: ${JEKYLL_ENV}"
39+
# @chown -R jekyll /workdir
40+
# @chmod -R 777 /workdir
41+
# @echo "env: ${JEKYLL_ENV}"
42+
# @$(BIN)/webpack --mode=production
43+
# @JEKYLL_ENV=${JEKYLL_ENV} bundle exec jekyll build --trace
44+
# @if [ '${BUILDKITE_BRANCH}' == 'staging' ]; then echo "updating sitemap.xml..." && sed -i -r 's/segment.com/segment.build/g' ./_site/sitemap.xml; fi;
3945

4046
.PHONY: upload-docs
4147
upload-docs:
@@ -82,6 +88,16 @@ changelog: vendor/bundle
8288
sidenav: vendor/bundle
8389
@node scripts/nav.js
8490

91+
# check internal links
92+
.PHONY: linkcheck-internal
93+
linkcheck-internal:
94+
@node scripts/checklinks-internal.js
95+
96+
# check external links
97+
.PHONY: linkcheck-external
98+
linkcheck-external:
99+
@node scripts/checklinks-external.js
100+
85101
.PHONY: zip-artifacts
86102
zip-artifacts:
87103
@tar czf build_package.tar.gz _site

package.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,16 @@
4747
"ajv": "^6.10.2",
4848
"algoliasearch": "^4.10.5",
4949
"ansi-regex": "^6.0.1",
50+
"browser-sync": "^2.27.7",
51+
"check-links": "^1.1.8",
5052
"clipboard": "^2.0.8",
5153
"dotenv": "^10.0.0",
5254
"glightbox": "^3.1.0",
55+
"globby": "11.0.4",
5356
"handlebars": "^4.7.7",
57+
"ora": "5.4.1",
58+
"posthtml": "^0.16.5",
59+
"posthtml-urls": "^1.0.0",
5460
"search-insights": "^2.0.3",
5561
"tap-spot": "^1.1.1",
5662
"tippy.js": "5.2.0",

scripts/catalog_papi.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ const getConnectionModes = (destination) => {
118118
}
119119
fs.mkdirSync(docsPath)
120120
fs.writeFileSync(`${docsPath}/index.md`, content)
121-
fs.appendFileSync('src/_data/catalog/incompleteDocs.txt', `${docsPath}\n`)
122121
}
123122
}
124123

@@ -167,6 +166,7 @@ const updateSources = async () => {
167166
const hiddenSources = [
168167
'amp',
169168
'factual-engine',
169+
'twilio-event-streams-beta'
170170
]
171171

172172
sources.forEach(source => {
@@ -197,6 +197,7 @@ const updateSources = async () => {
197197

198198
// create the catalog metadata
199199
let updatedSource = {
200+
id: source.id,
200201
display_name: source.name,
201202
slug,
202203
url,

scripts/checklinks-external.js

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
const globby = require('globby')
2+
const posthtml = require('posthtml')
3+
const fs = require('fs')
4+
const server = require('browser-sync').create()
5+
const checkLinks = require('check-links')
6+
const {
7+
$dataMetaSchema
8+
} = require('ajv')
9+
const ora = require('ora')
10+
11+
12+
13+
const checkForDeadExternalUrls = async () => {
14+
try {
15+
const files = await globby('_site/**/*.html')
16+
const throbber = ora('Link Check Starting').start()
17+
const urls = new Set()
18+
19+
const ph = posthtml([
20+
require('posthtml-urls')({
21+
eachURL: (url) => {
22+
if (!url.startsWith('http://0') && !url.startsWith('/') && !url.startsWith('https://github.com/segmentio')) {
23+
urls.add(url)
24+
}
25+
},
26+
}),
27+
])
28+
throbber.succeed()
29+
throbber.start('Processing files')
30+
31+
files.forEach((file) => {
32+
ph.process(fs.readFileSync(file))
33+
})
34+
throbber.succeed()
35+
throbber.start('Starting server')
36+
await new Promise((resolve) => {
37+
server.init({
38+
port: 3000,
39+
server: {
40+
baseDir: '_site',
41+
},
42+
open: false,
43+
logLevel: 'silent',
44+
},
45+
resolve,
46+
)
47+
throbber.succeed()
48+
})
49+
50+
throbber.start('Checking the links')
51+
const results = await checkLinks(
52+
Array.from(urls).map((url) =>
53+
url
54+
),
55+
)
56+
const deadUrls = Array.from(urls).filter(
57+
(url) => results[url].status === 'dead',
58+
)
59+
60+
if (deadUrls.length > 0) {
61+
throbber.fail(`Dead URLS: ${deadUrls.length}\n\n`)
62+
console.log(`Dead URLS: ${deadUrls.length}\n\n${deadUrls.join('\n')}`)
63+
process.exit(1)
64+
} else {
65+
console.log('All links work!')
66+
process.exit
67+
}
68+
throbber.stop()
69+
server.exit()
70+
} catch (e) {
71+
console.error(e)
72+
server.exit()
73+
}
74+
}
75+
checkForDeadExternalUrls()

scripts/checklinks-internal.js

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
const globby = require('globby')
2+
const posthtml = require('posthtml')
3+
const fs = require('fs')
4+
const server = require('browser-sync').create()
5+
const checkLinks = require('check-links')
6+
const ora = require('ora')
7+
8+
9+
const checkForDeadLocalUrls = async () => {
10+
try {
11+
const files = await globby('_site/**/*.html')
12+
const throbber = ora('Link Check Starting').start()
13+
const urls = new Set()
14+
15+
const ph = posthtml([
16+
require('posthtml-urls')({
17+
eachURL: (url) => {
18+
if (url.startsWith('/docs/')) {
19+
urls.add(url.replace('/docs/', 'http://localhost:3000/'))
20+
}
21+
},
22+
}),
23+
])
24+
throbber.succeed()
25+
throbber.start('Processing files')
26+
27+
files.forEach((file) => {
28+
ph.process(fs.readFileSync(file))
29+
})
30+
throbber.succeed()
31+
throbber.start('Starting server')
32+
33+
await new Promise((resolve) => {
34+
server.init({
35+
port: 3000,
36+
server: {
37+
baseDir: '_site',
38+
},
39+
open: false,
40+
logLevel: 'silent',
41+
},
42+
resolve,
43+
)
44+
throbber.succeed()
45+
})
46+
47+
const results = await checkLinks(
48+
Array.from(urls).map((url) =>
49+
url
50+
),
51+
)
52+
const deadUrls = Array.from(urls).filter(
53+
(url) => results[url].status === 'dead',
54+
)
55+
56+
let broke = []
57+
58+
deadUrls.forEach(url => {
59+
link = url.replace('http://localhost:3000', 'https://segment.com/docs')
60+
if (!link.endsWith('/')){
61+
link = link+'/'
62+
}
63+
broke.push(link)
64+
});
65+
66+
67+
68+
const redirects = ['https://segment.com/docs/guides/usage-and-billing/','https://segment.com/docs/connections/sources/catalog/libraries/website/plugins/', 'https://segment.com/docs/assets/docs.bundle.js/']
69+
const data = require('../_site/redirects.json')
70+
Object.keys(data).forEach(key => {
71+
if (!key.endsWith('/')){
72+
key = key+'/'
73+
}
74+
redirects.push('https://segment.com/docs'+key.replace('/docs',''))
75+
})
76+
broke = broke.filter(val => !redirects.includes(val));
77+
78+
if (broke.length > 0) {
79+
throbber.fail(`Dead URLS: ${broke.length}\n\n`)
80+
console.log(`Dead URLS: ${broke.length}\n\n${broke.join('\n')}`)
81+
process.exit(1)
82+
}else {
83+
console.log('All links work!')
84+
process.exit
85+
}
86+
throbber.stop()
87+
server.exit()
88+
} catch (e) {
89+
console.error(e)
90+
server.exit()
91+
}
92+
}
93+
94+
checkForDeadLocalUrls()
95+

src/_data/catalog/destination_categories.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# AUTOGENERATED FROM PUBLIC API. DO NOT EDIT
2-
# destination categories last updated 2021-12-14
2+
# destination categories last updated 2022-01-07
33
items:
44
- display_name: A/B Testing
55
slug: a-b-testing

0 commit comments

Comments
 (0)