Skip to content

Commit 640f2e8

Browse files
committed
Add type, hl, gl search params
1 parent d531a82 commit 640f2e8

File tree

3 files changed

+111
-8
lines changed

3 files changed

+111
-8
lines changed

README.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,21 @@ The url query options are as follows:
2121
<td>String</td>
2222
<td>YouTube search query</td>
2323
</tr>
24-
<tr>
24+
<tr>
25+
<td>type</td>
26+
<td>String</td>
27+
<td>(Optional) Item type (channel, playlist, video)</td>
28+
</tr>
29+
<tr>
30+
<td>hl</td>
31+
<td>String</td>
32+
<td>(Optional) Language (e.g. en)</td>
33+
</tr>
34+
<tr>
35+
<td>gl</td>
36+
<td>String</td>
37+
<td>(Optional) Region (e.g. US)</td>
38+
</tr>
2539
<td>pageToken</td>
2640
<td>String</td>
2741
<td>(Optional) The token for page of YouTube search results. Returned by initial call</td>

scraper.js

Lines changed: 89 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,80 @@
11
const request = require('request');
2+
const URL = require('url');
23

3-
async function youtube(query, key, pageToken) {
4+
async function getFilters(query) {
5+
return new Promise((resolve, reject) => {
6+
let url = `https://www.youtube.com/results?search_query=${encodeURIComponent(query)}&hl=en`;
7+
request(url, (error, response, html) => {
8+
if (!error && response.statusCode === 200) {
9+
// Get script json data from html to parse
10+
try {
11+
let match = html.match(/ytInitialData[^{]*(.*"adSafetyReason":[^;]*});/s);
12+
if (!match) {
13+
match = html.match(/ytInitialData"[^{]*(.*);\s*window\["ytInitialPlayerResponse"\]/s);
14+
}
15+
let json = JSON.parse(match[1]);
16+
17+
// The following adapted from:
18+
// https://github.com/TimeForANinja/node-ytsr/blob/wip-api-adjustments/lib/utils.js
19+
const BASE_URL = 'https://www.youtube.com/';
20+
const wrapper = json.contents.twoColumnSearchResultsRenderer.primaryContents.sectionListRenderer;
21+
const filterWrapper = wrapper.subMenu.searchSubMenuRenderer.groups;
22+
const parsedGroups = new Map();
23+
for (const filterGroup of filterWrapper) {
24+
// TODO: switch to Map when done caring about compatibility
25+
const singleFilterGroup = [];
26+
singleFilterGroup.active = null;
27+
for (const filter of filterGroup.searchFilterGroupRenderer.filters) {
28+
const isSet = !filter.searchFilterRenderer.navigationEndpoint;
29+
let targetURL = null;
30+
if (!isSet) targetURL = filter.searchFilterRenderer.navigationEndpoint.commandMetadata.webCommandMetadata.url;
31+
const parsedFilter = {
32+
description: filter.searchFilterRenderer.tooltip,
33+
label: parseText(filter.searchFilterRenderer.label),
34+
query: isSet ? null : URL.resolve(BASE_URL, targetURL),
35+
isSet: isSet,
36+
// TODO: remove when done caring about compatibility
37+
active: isSet,
38+
name: parseText(filter.searchFilterRenderer.label),
39+
ref: isSet ? null : URL.resolve(BASE_URL, targetURL),
40+
};
41+
if (isSet) singleFilterGroup.active = parsedFilter;
42+
singleFilterGroup.push(parsedFilter);
43+
}
44+
parsedGroups.set(parseText(filterGroup.searchFilterGroupRenderer.title), singleFilterGroup);
45+
}
46+
resolve(parsedGroups);
47+
}
48+
catch(ex) {
49+
console.error("Failed to parse data:", ex);
50+
}
51+
}
52+
});
53+
})
54+
}
55+
function parseText(txt) {
56+
return txt.simpleText || txt.runs.map(a => a.text).join('');
57+
}
58+
59+
async function youtube(params, key, pageToken) {
60+
let query, type, hl, gl;
61+
if (typeof params === 'string') {
62+
query = params;
63+
}
64+
else {
65+
query = params.query;
66+
type = params.type;
67+
hl = params.hl;
68+
gl = params.gl;
69+
}
70+
let filterUrl = null;
71+
if (type && !key && !pageToken) {
72+
let filters = await getFilters(query);
73+
let typeFilter = filters.get('Type').find( filter => filter.name.toLowerCase() === type.toLowerCase() );
74+
if (typeFilter && typeFilter.query) {
75+
filterUrl = typeFilter.query;
76+
}
77+
}
478
return new Promise((resolve, reject) => {
579
let json = { results: [], version: require('./package.json').version };
680

@@ -10,13 +84,16 @@ async function youtube(query, key, pageToken) {
1084
json["key"] = key;
1185

1286
// Access YouTube search API
87+
let clientParams = {
88+
clientName: "WEB",
89+
clientVersion: "2.20201022.01.01",
90+
};
91+
if (hl) clientParams.hl = hl;
92+
if (gl) clientParams.gl = gl;
1393
request.post(`https://www.youtube.com/youtubei/v1/search?key=${key}`, {
1494
json: {
1595
context: {
16-
client: {
17-
clientName: "WEB",
18-
clientVersion: "2.20201022.01.01",
19-
},
96+
client: clientParams,
2097
},
2198
continuation: pageToken
2299
},
@@ -29,7 +106,13 @@ async function youtube(query, key, pageToken) {
29106
});
30107
}
31108
else {
32-
let url = `https://www.youtube.com/results?q=${encodeURIComponent(query)}`;
109+
let url = filterUrl ? filterUrl : `https://www.youtube.com/results?q=${encodeURIComponent(query)}`;
110+
if (hl) {
111+
url += `&hl=${encodeURIComponent(hl)}`;
112+
}
113+
if (gl) {
114+
url += `&gl=${encodeURIComponent(gl)}`;
115+
}
33116

34117
// Access YouTube search
35118
request(url, (error, response, html) => {

server.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@ app.get('/', (req, res) => {
99

1010
//API route
1111
app.get('/api/search', (req, res) => {
12-
scraper.youtube(req.query.q, req.query.key, req.query.pageToken)
12+
const params = {
13+
query: req.query.q,
14+
type: req.query.type,
15+
hl: req.query.hl,
16+
gl: req.query.gl
17+
}
18+
scraper.youtube(params, req.query.key, req.query.pageToken)
1319
.then(x => res.json(x))
1420
.catch(e => res.send(e));
1521
});

0 commit comments

Comments
 (0)