Skip to content

Commit 6a55052

Browse files
authored
Merge branch 'master' into qt/fix_pod_latest_version
2 parents 0817188 + ae32c98 commit 6a55052

22 files changed

+1357
-202
lines changed

.github/workflows/test.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
name: Run Docker build and tests
22

33
on:
4+
workflow_dispatch:
45
push:
56
branches:
67
- master
8+
- prod
79
pull_request:
810
branches:
911
- master
12+
- prod
1013

1114
permissions:
1215
contents: read

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,17 @@ See `local.env.list`, `dev.env.list` and `prod.env.list` tempate files.
147147
- Allowed resource types: Container, Object
148148
- Allowed permissions: Read, Write, List, Add, Process
149149

150+
### Running Crawler Queues in separate Azure account
151+
152+
Crawler can be run with the queues in a different Azure account from the storage
153+
blobs. This is useful where you are running the crawler and submitting results
154+
to `clearlydefinedprod` _but_ you don't want to have queues in the same Azure
155+
account. As anyone with access to `clearlydefinedprod` can get access to your
156+
queues.
157+
158+
Set env var `CRAWLER_QUEUE_AZURE_CONNECTION_STRING` with a connection string
159+
obtained from Azure.
160+
150161
## Build and run Docker image locally
151162

152163
`docker build --platform linux/amd64 -t cdcrawler:latest .`

config/cdConfig.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ module.exports = {
127127
},
128128
storageQueue: {
129129
weights: { immediate: 3, soon: 2, normal: 3, later: 2 },
130-
connectionString: cd_azblob.connection,
130+
connectionString: config.get('CRAWLER_QUEUE_AZURE_CONNECTION_STRING') || cd_azblob.connection,
131131
queueName: config.get('CRAWLER_QUEUE_PREFIX') || 'cdcrawlerdev',
132132
visibilityTimeout: 8 * 60 * 60, // 8 hours
133133
visibilityTimeout_remainLocal: fetchedCacheTtlSeconds,

lib/fetch.js

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// (c) Copyright 2024, SAP SE and ClearlyDefined contributors. Licensed under the MIT license.
2+
// SPDX-License-Identifier: MIT
3+
4+
const axios = require('axios')
5+
6+
function buildRequestOptions(request) {
7+
let responseType = 'text'
8+
if (request.json) {
9+
responseType = 'json'
10+
} else if (request.encoding === null) {
11+
responseType = 'stream'
12+
}
13+
14+
const validateOptions = {}
15+
if (request.simple === false) {
16+
validateOptions.validateStatus = () => true
17+
}
18+
19+
return {
20+
method: request.method,
21+
url: request.url || request.uri,
22+
responseType,
23+
headers: request.headers,
24+
data: request.body,
25+
...validateOptions
26+
}
27+
}
28+
29+
async function callFetch(request, axiosInstance = axios) {
30+
try {
31+
const options = buildRequestOptions(request)
32+
const response = await axiosInstance(options)
33+
if (!request.resolveWithFullResponse) return response.data
34+
response.statusCode = response.status
35+
response.statusMessage = response.statusText
36+
return response
37+
} catch (error) {
38+
error.statusCode = error.response?.status
39+
throw error
40+
}
41+
}
42+
43+
function withDefaults(opts) {
44+
const axiosInstance = axios.create(opts)
45+
return request => callFetch(request, axiosInstance)
46+
}
47+
48+
module.exports = { callFetch, withDefaults }

0 commit comments

Comments
 (0)