|
1 | 1 | "use strict"; |
2 | 2 |
|
3 | | -var fs = require('fs'); |
4 | | -var readline = require('readline'); |
5 | | - |
6 | | -var GitHubApi = require('github'); |
7 | | -var _ = require('lodash'); |
8 | | -var parseXML = require('xml2js').parseString; |
9 | | -var async = require('async'); |
10 | | -var request = require('request'); |
11 | | -var unzip = require('unzip'); |
12 | | -var ProgressBar = require('progress'); |
13 | | - |
14 | | -var config = require('./config'); |
| 3 | +const { Octokit } = require('@octokit/rest'); |
| 4 | +const process = require('./process'); |
| 5 | +const axios = require('axios'); |
| 6 | +const unzip = require('unzipper'); |
| 7 | +const config = require('./config'); |
| 8 | + |
| 9 | +const octokit = new Octokit({ |
| 10 | + auth: config.github_token, |
| 11 | + userAgent: 'Labeller v2' |
| 12 | +}); |
| 13 | +const httpse = { |
| 14 | + owner: config.github_user, |
| 15 | + repo: config.github_repo |
| 16 | +} |
15 | 17 |
|
16 | | -// Fetch the Alexa top 1M sites and push it to an array `alexa` via streams |
17 | | -function get_alexa(alexa_cb){ |
| 18 | +let ProgressBar = require('progress'); |
18 | 19 |
|
19 | | - var alexa = [] |
20 | | - var csv_regex = /^[0-9]+,(.+)/ |
| 20 | +// Background process functions for logic flow below |
| 21 | +let Process = new process.Process(octokit, httpse); |
21 | 22 |
|
22 | | - request.get('https://s3.amazonaws.com/alexa-static/top-1m.csv.zip') |
23 | | - .on('error', function(err) { |
24 | | - alexa_cb(err); |
25 | | - }) |
26 | | - .pipe(unzip.Parse()) |
27 | | - .on('entry', function (entry) { |
| 23 | +/** |
| 24 | + * @description Fetch the Alexa top 1M sites and push it to an array `alexa` via streams |
| 25 | + * @returns object |
| 26 | + */ |
| 27 | +function initiate() { |
28 | 28 |
|
29 | | - var bar = new ProgressBar('Processing Alexa Top 1M [:bar] :percent :etas', { |
30 | | - total: 100 |
31 | | - }); |
| 29 | + let alexa = []; |
| 30 | + let regex = /^[0-9]+,(.+)/ |
| 31 | + const alexa_csv = 'https://s3.amazonaws.com/alexa-static/top-1m.csv.zip'; |
32 | 32 |
|
33 | | - var lineReader = require('readline').createInterface({ |
34 | | - input: entry |
35 | | - }); |
| 33 | + // Grab Alexa data |
| 34 | + axios({ |
| 35 | + method: 'get', |
| 36 | + url: alexa_csv, |
| 37 | + responseType: 'stream' |
| 38 | + }) |
| 39 | + .then(function (response) { |
| 40 | + response.data.pipe(unzip.Parse()) |
| 41 | + .on('entry', function (entry) { |
| 42 | + let bar = new ProgressBar('Processing Alexa Top 1M [:bar] :percent :etas', { |
| 43 | + total: 100 |
| 44 | + }); |
36 | 45 |
|
37 | | - var x = 0; |
38 | | - lineReader.on('line', function (line) { |
39 | | - var domain = line.match(csv_regex)[1] |
40 | | - alexa.push(domain); |
| 46 | + let lineReader = require('readline').createInterface({ |
| 47 | + input: entry, |
| 48 | + }); |
41 | 49 |
|
42 | | - if(x % 10000 == 0) bar.tick(); |
43 | | - x++; |
44 | | - }); |
| 50 | + let x = 0; |
45 | 51 |
|
46 | | - lineReader.on('close', function(){ |
47 | | - alexa_cb(null, alexa); |
48 | | - }); |
| 52 | + lineReader.on('line', function (line) { |
| 53 | + let domain = line.match(regex)[1]; |
| 54 | + alexa.push(domain); |
| 55 | + if(x % 10000 == 0) bar.tick(); |
| 56 | + x++; |
| 57 | + }); |
49 | 58 |
|
| 59 | + lineReader.on('close', function(){ |
| 60 | + try { |
| 61 | + get_prs(alexa); |
| 62 | + } catch (error) { |
| 63 | + console.log(error); |
| 64 | + } |
| 65 | + }); |
| 66 | + }) |
| 67 | + }) |
| 68 | + .catch(function (error) { |
| 69 | + console.log(error); |
50 | 70 | }); |
51 | | -}; |
52 | | - |
53 | | -function get_most_recent_pr(alexa, recent_cb){ |
54 | | - fs.readFile(config.state_file, function(err, data){ |
55 | | - if(err){ |
56 | | - fs.writeFile(config.state_file, '0', function(err){ |
57 | | - if(err) return recent_cb(err); |
58 | | - recent_cb(null, [alexa, 0]); |
59 | | - }); |
60 | | - } else { |
61 | | - recent_cb(null, [alexa, Number(data)]); |
62 | | - } |
63 | | - }); |
64 | 71 | } |
65 | 72 |
|
66 | | -function github_process_prs(res, pr_cb){ |
67 | | - var alexa = res[0], |
68 | | - most_recent_pr_checked = res[1]; |
69 | | - |
70 | | - var github = new GitHubApi(); |
71 | | - var wildcard_www_regex = /^(www|\*)\.(.+)/ |
72 | | - |
73 | | - var httpse = { |
74 | | - user: config.github_user, |
75 | | - repo: config.github_repo |
76 | | - } |
77 | | - |
78 | | - github.authenticate({ |
79 | | - type: "oauth", |
80 | | - token: config.github_token || process.env.GITHUB_TOKEN |
| 73 | +/** |
| 74 | + * @param {obj} alexa |
| 75 | + * @description Returns Pull Requests to label |
| 76 | + */ |
| 77 | +function get_prs(alexa) { |
| 78 | + let wildcard_www_regex = /^(www|\*)\.(.+)/ |
| 79 | + |
| 80 | + octokit.paginate( |
| 81 | + "GET /repos/:owner/:repo/pulls", |
| 82 | + httpse, |
| 83 | + ) |
| 84 | + .then(prs => { |
| 85 | + process_prs(alexa, prs) |
81 | 86 | }) |
| 87 | + .catch(reason => { |
| 88 | + console.log(reason); |
| 89 | + }) |
| 90 | +} |
82 | 91 |
|
83 | | - // Label all PRs which meet the criteria for labelling |
84 | | - function github_process_pr_page(first_page){ |
85 | | - return function(err, pull_requests){ |
86 | | - if(first_page){ |
87 | | - fs.writeFile(config.state_file, pull_requests[0].number, function(err){ |
88 | | - if(err) return pr_cb(err); |
89 | | - }); |
90 | | - } |
91 | | - |
92 | | - _.each(pull_requests, function(pull_request){ |
93 | | - |
94 | | - if(pull_request.number > most_recent_pr_checked){ |
95 | | - github.pullRequests.getFiles(_.extend(httpse, { |
96 | | - number: pull_request.number |
97 | | - }), function(err, files){ |
98 | | - if(err) return pr_cb(err); |
99 | | - |
100 | | - // Rank a list of target hosts, returning the minimum alexa placing |
101 | | - function rank_targets(targets){ |
102 | | - var minimum_placing = 9999999; |
103 | | - |
104 | | - _.each(targets, function(host){ |
105 | | - if(host.match(wildcard_www_regex)){ |
106 | | - host = host.match(wildcard_www_regex)[2]; |
107 | | - } |
108 | | - |
109 | | - var alexa_placing = alexa.indexOf(host); |
110 | | - if(~alexa_placing && alexa_placing < minimum_placing){ |
111 | | - minimum_placing = alexa_placing; |
112 | | - } |
113 | | - }); |
114 | | - |
115 | | - if(minimum_placing != 9999999){ |
116 | | - return minimum_placing; |
117 | | - } |
118 | | - } |
119 | | - |
120 | | - // Given the url of an HTTPSE ruleset, return a list of targets to fetch_cb |
121 | | - function fetch_url_and_parse_targets(url, fetch_cb){ |
122 | | - request({url: url}, function(err, res, body){ |
123 | | - if(err) return fetch_cb(err); |
124 | | - |
125 | | - parseXML(body, function(err, root){ |
126 | | - if(err) return fetch_cb(err); |
127 | | - |
128 | | - fetch_cb(null, _.map(root.ruleset.target, function(target){ |
129 | | - return target.$.host; |
130 | | - })); |
131 | | - }); |
132 | | - }); |
133 | | - } |
134 | | - |
135 | | - var file_fetches = []; |
136 | | - |
137 | | - // Out of the list of files for this PR, figure out the minimum Alexa ranking for each |
138 | | - _.each(files, function(file){ |
139 | | - if(file.filename.match(/^src\/chrome\/content\/rules\//)){ |
140 | | - file_fetches.push(function(file_cb){ |
141 | | - fetch_url_and_parse_targets(file.raw_url, function(err, targets){ |
142 | | - if(err) return file_cb(err); |
143 | | - |
144 | | - console.log("Processing PR: " + pull_request.number + ", file: " + file.filename); |
145 | | - |
146 | | - var ranking = rank_targets(targets); |
147 | | - if(ranking){ |
148 | | - return file_cb(null, { |
149 | | - alexa_placing: ranking, |
150 | | - pr_number: pull_request.number |
151 | | - }); |
152 | | - } else { |
153 | | - return file_cb(); |
154 | | - } |
155 | | - }); |
156 | | - }); |
157 | | - } |
158 | | - }); |
159 | | - |
160 | | - async.parallel(file_fetches, function(err, res){ |
161 | | - if(err) pr_cb(err); |
162 | | - |
163 | | - var reduced_pr_ranking = _.reduce(_.filter(res), |
164 | | - function(minimum_file_res, file_res){ |
165 | | - if(file_res.alexa_placing < minimum_file_res.alexa_placing){ |
166 | | - return file_res; |
167 | | - } |
168 | | - return minimum_file_res; |
169 | | - }); |
170 | | - |
171 | | - if(reduced_pr_ranking){ |
172 | | - let label; |
173 | | - if(reduced_pr_ranking.alexa_placing < 100){ |
174 | | - label = "top-100"; |
175 | | - } else if(reduced_pr_ranking.alexa_placing < 1000){ |
176 | | - label = "top-1k"; |
177 | | - } else if(reduced_pr_ranking.alexa_placing < 10000){ |
178 | | - label = "top-10k"; |
179 | | - } else if(reduced_pr_ranking.alexa_placing < 100000){ |
180 | | - label = "top-100k"; |
181 | | - } else { |
182 | | - label = "top-1m"; |
183 | | - } |
184 | | - console.log("Applying label `" + label + "` to PR: " + reduced_pr_ranking.pr_number); |
185 | | - |
186 | | - github.issues.addLabels(_.extend(httpse, { |
187 | | - number: reduced_pr_ranking.pr_number, |
188 | | - body: [label] |
189 | | - }), function(err, res){ |
190 | | - if(err) console.log(err); |
191 | | - }); |
192 | | - } |
193 | | - }); |
194 | | - }); |
195 | | - } |
196 | | - }); |
197 | | - |
198 | | - if(github.hasNextPage(pull_requests)){ |
199 | | - github.getNextPage(pull_requests, github_process_pr_page(false)); |
| 92 | +/** |
| 93 | + * @param {obj} alexa |
| 94 | + * @param {obj} prs |
| 95 | + * @description Labels Pull Requests |
| 96 | + */ |
| 97 | +function process_prs(alexa, prs) { |
| 98 | + let filtered_prs = prs.filter(Process.labelled); |
| 99 | + |
| 100 | + prs.forEach(pr => { |
| 101 | + |
| 102 | + let domain_label_pairs = []; |
| 103 | + |
| 104 | + octokit.pulls.listFiles({ |
| 105 | + ...httpse, |
| 106 | + pull_number: pr.number, |
| 107 | + }).then(files => { |
| 108 | + let rank_number = Process.files(files, alexa); |
| 109 | + if(rank_number !== null) { |
| 110 | + let determined_label = Process.return_label(rank_number); |
| 111 | + // pr is interchangeable with issue in API ¯\_(ツ)_/¯ |
| 112 | + Process.add_label(determined_label, pr.number); |
200 | 113 | } |
201 | | - } |
202 | | - } |
203 | | - |
204 | | - github.pullRequests.getAll(_.extend(httpse, { |
205 | | - state: "open", |
206 | | - per_page: 100 |
207 | | - }), github_process_pr_page(true)); |
| 114 | + }) |
| 115 | + }); |
208 | 116 | } |
209 | 117 |
|
210 | | -async.waterfall([ |
211 | | - get_alexa, |
212 | | - get_most_recent_pr, |
213 | | - github_process_prs |
214 | | -], function(err, result){ |
215 | | - if(err) console.log(err); |
216 | | -}); |
| 118 | +initiate(); |
0 commit comments