-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathscraper.js
More file actions
80 lines (78 loc) · 3.16 KB
/
scraper.js
File metadata and controls
80 lines (78 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
const axios = require('axios').default;
const cheerio = require('cheerio');
const URL = 'https://www.jobs.id/lowongan-kerja/';
const jobDetail = require('./job_detail');
function getSalary(salary){
let new_salary = salary.split('\n');
let start = 0;
let end = 0;
let secret = false;
if(new_salary[1] !== undefined && new_salary[3] !== undefined){
start = Number(new_salary[1].trim().replace(/\./g,''));
end = Number(new_salary[3].trim().replace(/\./g,''));
}else if(new_salary[0] === 'Gaji Dirahasiakan'){
secret = true;
}
return {start,end,secret};
}
const scraper = (callback,request=25) => {
axios.get(URL).then(({data}) => {
return data;
}).then((html) => {
const $ = cheerio.load(html);
const jobs = $('body').find('.single-job-ads');
let data = [];
for (let index = 0; index < request; index++) {
if(index >= jobs.length ){
break;
}
const el = $(jobs[index]);
const title = el.find('h3').eq(0).text().trim();
const link = el.find('h3').find('a').attr('href').trim();
let company = el.find('p > a[class="bold"]').eq(0).text().trim();
company = company == '' ? 'Perusahaan Dirahasiakan' : company;
const location = el.find('p > span').eq(0).text().trim();
const salary = getSalary(el.find('p').eq(1).text().trim());
data.push({link,title,company,location,salary});
}
return data;
}).then(data => {
return Promise.all(
data.map((basic) => {
return jobDetail(basic.link).then(detail => {
return Promise.resolve({basic, detail});
}).then(data => {
let clean = [];
clean = {
origin_post : data.basic.link,
title : data.basic.title,
category : data.detail.category,
location : data.basic.location,
salary : data.basic.salary,
description : data.detail.description,
requirement : data.detail.requirement,
posted_at : data.detail.posted_at,
deadline : data.detail.deadline,
apply_linkedin : data.detail.apply_linkedin,
apply_google : data.detail.apply_google,
company : {
company : data.basic.company,
industry :data.detail.industry,
logo : data.detail.logo,
about : data.detail.about_company,
size : data.detail.size_company,
address : data.detail.office_address,
}
}
return clean;
});
})
);
}).then((response) => {
callback(response);
}).catch((err) => {
console.error(err);
});
}
module.exports = scraper;
// scraper(console.log,1)