Skip to content

Commit 0b931ff

Browse files
authored
Merge branch 'master' into dependabot/npm_and_yarn/micromatch-4.0.8
2 parents 806010b + 6b7bee1 commit 0b931ff

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1787
-464
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# This workflow will build a docker image, push it to ghcr.io, and deploy it to an Azure WebApp.
2+
name: Build and Deploy -- DEV
3+
4+
on:
5+
workflow_dispatch:
6+
push:
7+
branches: [master]
8+
9+
jobs:
10+
upload-package-lock-json:
11+
name: Upload package-lock.json from this repo
12+
runs-on: ubuntu-latest
13+
steps:
14+
- name: Checkout
15+
uses: actions/checkout@v4.1.1
16+
17+
- name: Upload package-lock.json
18+
uses: actions/upload-artifact@v4
19+
with:
20+
name: package-lock.json
21+
path: package-lock.json
22+
23+
build-and-deploy:
24+
name: Build and Deploy
25+
needs: upload-package-lock-json
26+
uses: clearlydefined/operations/.github/workflows/app-build-and-deploy.yml@v3.2.0
27+
secrets:
28+
AZURE_CREDENTIALS: ${{ secrets.AZURE_CREDENTIALS }}
29+
AZURE_WEBAPP_PUBLISH_PROFILE: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_DEV }}
30+
DEPLOY_TOKEN: ${{ secrets.DEPLOY_TOKEN }}
31+
PRODUCTION_DEPLOYERS: ${{ secrets.PRODUCTION_DEPLOYERS }}
32+
with:
33+
deploy-env: dev
34+
application-type: worker
35+
azure-app-base-name: cdcrawler
36+
azure-app-name-postfix: -dev
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# This workflow will build a docker image, push it to ghcr.io, and deploy it to an Azure WebApp.
2+
name: Build and Deploy -- PROD
3+
4+
on:
5+
workflow_dispatch:
6+
release:
7+
types: [published]
8+
9+
jobs:
10+
upload-package-lock-json:
11+
name: Upload package-lock.json from this repo
12+
runs-on: ubuntu-latest
13+
steps:
14+
- name: Checkout
15+
uses: actions/checkout@v4.1.1
16+
17+
- name: Upload package-lock.json
18+
uses: actions/upload-artifact@v4
19+
with:
20+
name: package-lock.json
21+
path: package-lock.json
22+
23+
build-and-deploy-prod:
24+
needs: upload-package-lock-json
25+
uses: clearlydefined/operations/.github/workflows/app-build-and-deploy.yml@v3.2.0
26+
secrets:
27+
AZURE_CREDENTIALS: ${{ secrets.AZURE_CREDENTIALS }}
28+
AZURE_WEBAPP_PUBLISH_PROFILE: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE_PROD }}
29+
DEPLOY_TOKEN: ${{ secrets.DEPLOY_TOKEN }}
30+
PRODUCTION_DEPLOYERS: ${{ secrets.PRODUCTION_DEPLOYERS }}
31+
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
32+
with:
33+
deploy-env: prod
34+
application-type: worker
35+
azure-app-base-name: cdcrawler
36+
azure-app-name-postfix: -prod
37+
docker-hub-username: ${{ vars.DOCKERHUB_USERNAME }}

DevDockerfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,13 @@
44
FROM node:18-bullseye
55
ENV APPDIR=/opt/service
66

7+
# Set environment variables from build arguments
78
ARG BUILD_NUMBER=0
8-
ENV CRAWLER_BUILD_NUMBER=$BUILD_NUMBER
9+
ENV BUILD_NUMBER=$APP_VERSION
10+
ARG APP_VERSION="UNKNOWN"
11+
ENV APP_VERSION=$APP_VERSION
12+
ARG BUILD_SHA="UNKNOWN"
13+
ENV BUILD_SHA=$BUILD_SHA
914

1015
# Ruby and Python Dependencies
1116
RUN apt-get update && apt-get install -y --no-install-recommends --no-install-suggests curl bzip2 build-essential libssl-dev libreadline-dev zlib1g-dev cmake python3 python3-dev python3-pip xz-utils libxml2-dev libxslt1-dev libpopt0 && \

Dockerfile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44
FROM node:18-bullseye
55
ENV APPDIR=/opt/service
66

7-
ARG BUILD_NUMBER=0
8-
ENV CRAWLER_BUILD_NUMBER=$BUILD_NUMBER
7+
# Set environment variables from build arguments
8+
ARG APP_VERSION="UNKNOWN"
9+
ENV APP_VERSION=$APP_VERSION
10+
ARG BUILD_SHA="UNKNOWN"
11+
ENV BUILD_SHA=$BUILD_SHA
912

1013
# Ruby and Python Dependencies
1114
RUN apt-get update && apt-get install -y --no-install-recommends --no-install-suggests curl bzip2 build-essential libssl-dev libreadline-dev zlib1g-dev cmake python3 python3-dev python3-pip xz-utils libxml2-dev libxslt1-dev libpopt0 && \

config/cdConfig.js

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@ const config = require('painless-config')
55

66
const cd_azblob = {
77
connection: config.get('CRAWLER_AZBLOB_CONNECTION_STRING'),
8-
container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME')
8+
container: config.get('CRAWLER_AZBLOB_CONTAINER_NAME'),
9+
account: config.get('CRAWLER_AZBLOB_ACCOUNT_NAME'),
10+
spnAuth: config.get('CRAWLER_AZBLOB_SPN_AUTH'),
11+
isSpnAuth: config.get('CRAWLER_AZBLOB_IS_SPN_AUTH') || false
912
}
1013

1114
const githubToken = config.get('CRAWLER_GITHUB_TOKEN')
@@ -95,10 +98,11 @@ module.exports = {
9598
// '--quiet'
9699
],
97100
timeout: 1000,
98-
processes: 2,
101+
processes: Number(config.get('CRAWLER_SCANCODE_PARALLELISM') || process.env.CRAWLER_SCANCODE_PARALLELISM) || 2,
99102
format: '--json-pp'
100103
},
101104
source: {},
105+
sourcearchive: {},
102106
top: { githubToken }
103107
},
104108
store: {
@@ -110,7 +114,10 @@ module.exports = {
110114
},
111115
azqueue: {
112116
connectionString: cd_azblob.connection,
113-
queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests'
117+
account: cd_azblob.account,
118+
queueName: config.get('CRAWLER_HARVESTS_QUEUE_NAME') || 'harvests',
119+
spnAuth: config.get('CRAWLER_HARVESTS_QUEUE_SPN_AUTH'),
120+
isSpnAuth: config.get('CRAWLER_HARVESTS_QUEUE_IS_SPN_AUTH') || false
114121
},
115122
'cd(azblob)': cd_azblob,
116123
'cd(file)': cd_file
@@ -134,7 +141,12 @@ module.exports = {
134141
maxDequeueCount: 5,
135142
attenuation: {
136143
ttl: 3000
137-
}
138-
}
144+
},
145+
spnAuth: config.get('CRAWLER_QUEUE_AZURE_SPN_AUTH') || cd_azblob.spnAuth,
146+
account: config.get('CRAWLER_QUEUE_AZURE_ACCOUNT_NAME') || cd_azblob.account,
147+
isSpnAuth: config.get('CRAWLER_QUEUE_AZURE_IS_SPN_AUTH') || false
148+
},
149+
appVersion: config.get('APP_VERSION'),
150+
buildsha: config.get('BUILD_SHA')
139151
}
140152
}

ghcrawler/app.js

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@ function configureApp(service, logger) {
2222
app.use('/requests', require('./routes/requests')(service))
2323

2424
// to keep AlwaysOn flooding logs with errors
25-
app.get('/', (request, response) => {
26-
response.helpers.send.noContent()
27-
})
25+
app.use('/', require('./routes/index')(config.get('BUILD_SHA'), config.get('APP_VERSION')))
2826

2927
// Catch 404 and forward to error handler
3028
const requestHandler = (request, response, next) => {

ghcrawler/lib/crawler.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -638,7 +638,7 @@ class Crawler {
638638
metadata.errorMessage = request._error.message
639639
metadata.errorStack = request._error.stack
640640
}
641-
metadata.version = 1
641+
metadata.version = '1'
642642
metadata.meta = request.meta
643643
metadata.type = 'deadletter'
644644
metadata.url = request.url.replace('//', '//deadletter.')

ghcrawler/lib/traversalPolicy.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,10 @@ class TraversalPolicy {
143143
return new TraversalPolicy('storageOnly', 'always', TraversalPolicy._resolveMapSpec(map))
144144
}
145145

146+
static reharvestAlways(map) {
147+
return new TraversalPolicy('mutables', 'always', TraversalPolicy._resolveMapSpec(map))
148+
}
149+
146150
static clone(policy) {
147151
return new TraversalPolicy(policy.fetch, policy.freshness, policy.map)
148152
}

0 commit comments

Comments
 (0)