forked from clearlydefined/crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpackagistFetch.js
More file actions
104 lines (88 loc) · 3.66 KB
/
packagistFetch.js
File metadata and controls
104 lines (88 loc) · 3.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
// Copyright (c) Microsoft Corporation and others. Licensed under the MIT license.
// SPDX-License-Identifier: MIT
const AbstractFetch = require('./abstractFetch')
const requestRetry = require('requestretry').defaults({ maxAttempts: 3, fullResponse: true })
const fs = require('fs')
const { get } = require('lodash')
const { getStream } = require('../../lib/fetch')
const { promisify } = require('util')
const readdir = promisify(fs.readdir)
const FetchResult = require('../../lib/fetchResult')
const providerMap = {
packagist: 'https://repo.packagist.org/'
}
class PackagistFetch extends AbstractFetch {
canHandle(request) {
const spec = this.toSpec(request)
return spec && spec.provider === 'packagist'
}
async handle(request) {
const spec = this.toSpec(request)
const registryData = await this._getRegistryData(spec)
if (!registryData || !registryData.manifest) return this.markSkip(request)
super.handle(request)
const file = this.createTempFile(request)
await this._getPackage(request, registryData, file.name)
const dir = this.createTempDir(request)
await this.decompress(file.name, dir.name)
const hashes = await this.computeHashes(file.name)
const fetchResult = new FetchResult(request.url)
fetchResult.document = this._createDocument(dir, registryData, hashes)
fetchResult.document.dirRoot = await this._getDirRoot(dir.name)
request.fetchResult = fetchResult.adoptCleanup(dir, request)
return request
}
async _getRegistryData(spec) {
const baseUrl = providerMap.packagist
const { body, statusCode } = await requestRetry.get(`${baseUrl}p2/${spec.namespace}/${spec.name}.json`, {
json: true
})
if (statusCode !== 200 || !body) return null
const registryData = body
// Get the array of versions for this package
const packageVersions = registryData.packages[`${spec.namespace}/${spec.name}`]
registryData.manifest = this._extractManifest(packageVersions, spec)
if (!registryData.manifest) return null
registryData.releaseDate = get(registryData, 'manifest.time')
delete registryData['packages']
return registryData
}
_extractManifest(packageVersions, spec) {
if (!packageVersions || !Array.isArray(packageVersions)) return null
// Find the specific version in the array - handle both 'v1.0.0' and '1.0.0' formats
const targetVersion = spec.revision
const targetVersionWithV = `v${spec.revision}`
const targetIndex = packageVersions.findIndex(
versionObj => versionObj.version === targetVersion || versionObj.version === targetVersionWithV
)
if (targetIndex === -1) return null
const combined = {}
for (let i = 0; i <= targetIndex; i++) {
for (const [key, value] of Object.entries(packageVersions[i])) {
if (value === '__unset') {
delete combined[key]
} else {
combined[key] = value
}
}
}
return combined
}
async _getPackage(request, registryData, destination) {
const distUrl = get(registryData, 'manifest.dist.url')
if (!distUrl) return request.markSkip('Missing dist.url ')
const response = await getStream({ url: distUrl })
if (response.statusCode !== 200) throw new Error(`${response.statusCode} ${response.message}`)
await new Promise(resolve => {
response.data.pipe(fs.createWriteStream(destination)).on('finish', () => resolve(null))
})
}
async _getDirRoot(location) {
return (await readdir(location))[0]
}
_createDocument(dir, registryData, hashes) {
const releaseDate = registryData.releaseDate
return { location: dir.name, registryData, releaseDate, hashes }
}
}
module.exports = options => new PackagistFetch(options)