Skip to content

Commit 9072a7c

Browse files
authored
Added ability to download from a search query (#119)
2 parents 9749674 + 21bf20d commit 9072a7c

File tree

8 files changed

+160
-18
lines changed

8 files changed

+160
-18
lines changed

.DS_Store

-2 KB
Binary file not shown.

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ jobs:
2222
run: npm ci
2323

2424
- name: Run tests
25-
run: npm test
25+
run: npm run test:unit

.gitignore

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,3 @@ testing_folder
135135
# config
136136
user_config.json
137137
reddit_post_downloader.code-workspace
138-
139-
.DS_Store
140-
.DS_Store

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ Contribute @ https://github.com/josephrcox/easy-reddit-downloader
9797
```
9898
Note: at this time, comments from a specific user are not supported. If you would like this, please submit an issue with the `enhancement` tag.
9999

100+
## Downloading from a search query
101+
102+
To download search results, enter your search query in this format when asked what subreddits you would like to download: `search:subreddit:aww (cats AND dogs) OR birds`.
103+
The search query can contain any of the search modifiers shown on [this webpage](https://support.reddithelp.com/hc/en-us/articles/19696541895316-Available-search-features) and should appear exactly as it would in the Reddit search bar, but with `search:` prepended to the query.
104+
100105
## Downloading from a post list (download specific URLs)
101106
With version v0.2.1, I added the ability to download specific posts from a list of URLs.
102107

__tests__/e2e.test.js

Lines changed: 123 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ const {
1717
getFileName,
1818
getMediaDownloadInfo,
1919
buildRedditApiUrl,
20+
extractName,
2021
DEFAULT_REQUEST_TIMEOUT,
2122
} = require('../lib/utils');
2223

@@ -125,9 +126,13 @@ describe('Reddit API Integration', () => {
125126
});
126127

127128
test('handles non-existent subreddit gracefully', async () => {
128-
await expect(
129-
fetchRedditPosts('thisdoesnotexist123456789xyz', 1),
130-
).rejects.toThrow();
129+
try {
130+
await fetchRedditPosts('thisdoesnotexist123456789xyz', 1);
131+
expect(true).toBe(false);
132+
} catch (err) {
133+
expect(err).toBeDefined();
134+
expect(err.message || err.response?.status).toBeDefined();
135+
}
131136
});
132137
});
133138

@@ -338,6 +343,105 @@ describe('User Profile Downloads', () => {
338343
});
339344
});
340345

346+
describe('Search Query Downloads', () => {
347+
test('can fetch posts from a search query', async () => {
348+
const query = 'search:subreddit:aww cats AND cute';
349+
const url = buildRedditApiUrl({
350+
target: extractName(query),
351+
isUser: false,
352+
isSearch: true,
353+
sorting: 'new',
354+
time: 'all',
355+
limit: 5,
356+
});
357+
358+
const response = await axios.get(url, {
359+
timeout: DEFAULT_REQUEST_TIMEOUT,
360+
headers: {
361+
'User-Agent': 'RedditDownloaderTest/1.0',
362+
},
363+
});
364+
365+
const posts = response.data.data.children.map((child) => child.data);
366+
367+
expect(posts.length).toBeGreaterThan(0);
368+
expect(posts[0]).toHaveProperty('title');
369+
});
370+
});
371+
372+
describe('Multiple subreddits (one search, one not)', () => {
373+
test('can download from multiple targets: one subreddit and one search query', async () => {
374+
const regularSubreddit = 'pics';
375+
const searchQuery = 'search:subreddit:aww cats AND cute';
376+
377+
// Fetch from regular subreddit
378+
const regularPosts = await fetchRedditPosts(regularSubreddit, 15);
379+
expect(regularPosts.length).toBeGreaterThan(0);
380+
381+
// Fetch from search query
382+
const searchUrl = buildRedditApiUrl({
383+
target: extractName(searchQuery),
384+
isUser: false,
385+
isSearch: true,
386+
sorting: 'new',
387+
time: 'all',
388+
limit: 15,
389+
});
390+
const searchResponse = await axios.get(searchUrl, {
391+
timeout: DEFAULT_REQUEST_TIMEOUT,
392+
headers: { 'User-Agent': 'RedditDownloaderTest/1.0' },
393+
});
394+
const searchPosts = searchResponse.data.data.children.map((child) => child.data);
395+
expect(searchPosts.length).toBeGreaterThan(0);
396+
397+
let downloadedFromRegular = false;
398+
let downloadedFromSearch = false;
399+
400+
// Download at least one file from regular subreddit (e.g. image)
401+
const imageFromRegular = regularPosts.find(
402+
(p) =>
403+
p.url &&
404+
(p.url.endsWith('.jpg') || p.url.endsWith('.png') || p.url.endsWith('.jpeg')),
405+
);
406+
if (imageFromRegular) {
407+
const ext = imageFromRegular.url.split('.').pop().split('?')[0];
408+
const filePath = path.join(TEST_DOWNLOAD_DIR, `multi_subreddit_pics.${ext}`);
409+
await downloadFile(imageFromRegular.url, filePath);
410+
expect(fs.existsSync(filePath)).toBe(true);
411+
expect(fs.statSync(filePath).size).toBeGreaterThan(0);
412+
downloadedFromRegular = true;
413+
}
414+
415+
// Download at least one file from search results (image or text)
416+
const imageFromSearch = searchPosts.find(
417+
(p) =>
418+
p.url &&
419+
(p.url.endsWith('.jpg') || p.url.endsWith('.png') || p.url.endsWith('.jpeg')),
420+
);
421+
if (imageFromSearch) {
422+
const ext = imageFromSearch.url.split('.').pop().split('?')[0];
423+
const filePath = path.join(TEST_DOWNLOAD_DIR, `multi_search_aww.${ext}`);
424+
await downloadFile(imageFromSearch.url, filePath);
425+
expect(fs.existsSync(filePath)).toBe(true);
426+
expect(fs.statSync(filePath).size).toBeGreaterThan(0);
427+
downloadedFromSearch = true;
428+
}
429+
if (!downloadedFromSearch) {
430+
const selfPost = searchPosts.find((p) => getPostType(p) === 0);
431+
if (selfPost) {
432+
const filePath = path.join(TEST_DOWNLOAD_DIR, 'multi_search_aww.txt');
433+
const content = `${selfPost.title}\n\n${selfPost.selftext || ''}`;
434+
fs.writeFileSync(filePath, content);
435+
expect(fs.existsSync(filePath)).toBe(true);
436+
downloadedFromSearch = true;
437+
}
438+
}
439+
440+
expect(downloadedFromRegular).toBe(true);
441+
expect(downloadedFromSearch).toBe(true);
442+
});
443+
});
444+
341445
describe('Gallery Post Detection', () => {
342446
test('can identify gallery posts', async () => {
343447
const posts = await fetchRedditPosts('itookapicture', 20, 'top', 'month');
@@ -362,21 +466,29 @@ describe('Error Handling', () => {
362466
const invalidUrl =
363467
'https://www.reddit.com/r/thisdefinitelydoesnotexist12345/top/.json';
364468

365-
await expect(
366-
axios.get(invalidUrl, {
469+
try {
470+
await axios.get(invalidUrl, {
367471
timeout: DEFAULT_REQUEST_TIMEOUT,
368472
headers: { 'User-Agent': 'RedditDownloaderTest/1.0' },
369-
}),
370-
).rejects.toThrow();
473+
});
474+
expect(true).toBe(false);
475+
} catch (err) {
476+
expect(err).toBeDefined();
477+
expect(err.response?.status === 404 || err.message).toBeTruthy();
478+
}
371479
});
372480

373481
test('handles timeout appropriately', async () => {
374-
await expect(
375-
axios.get('https://www.reddit.com/r/pics/top/.json', {
482+
try {
483+
await axios.get('https://www.reddit.com/r/pics/top/.json', {
376484
timeout: 1,
377485
headers: { 'User-Agent': 'RedditDownloaderTest/1.0' },
378-
}),
379-
).rejects.toThrow();
486+
});
487+
expect(true).toBe(false);
488+
} catch (err) {
489+
expect(err).toBeDefined();
490+
expect(err.code === 'ECONNABORTED' || err.message).toBeTruthy();
491+
}
380492
});
381493
});
382494

lib/api.js

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ const {
99
MAX_POSTS_PER_REQUEST,
1010
DEFAULT_REQUEST_TIMEOUT,
1111
isUserProfile,
12+
isSearchQuery,
1213
extractName,
1314
buildRedditApiUrl,
1415
} = require('./utils');
@@ -69,6 +70,7 @@ function createApi(config, state, log, downloaders) {
6970
*/
7071
async function downloadSubredditPosts(target, lastPostId = '', onComplete) {
7172
const isUser = isUserProfile(target);
73+
const isSearch = isSearchQuery(target);
7274
const name = extractName(target);
7375

7476
// Check if we've downloaded enough posts
@@ -90,6 +92,7 @@ function createApi(config, state, log, downloaders) {
9092
const reqUrl = buildRedditApiUrl({
9193
target: name,
9294
isUser,
95+
isSearch,
9396
sorting: state.sorting,
9497
time: state.time,
9598
limit: postsRemaining,
@@ -110,7 +113,9 @@ function createApi(config, state, log, downloaders) {
110113
throw new Error(
111114
isUser
112115
? 'User not found or has no posts'
113-
: 'Subreddit not found or empty',
116+
: (isSearch
117+
? 'Search query found no results'
118+
: 'Subreddit not found or empty'),
114119
);
115120
}
116121

@@ -126,7 +131,11 @@ function createApi(config, state, log, downloaders) {
126131

127132
if (isUser) {
128133
state.downloadDirectory = `${state.downloadDirectoryBase}/user_${name}`;
129-
} else {
134+
}
135+
else if (isSearch) {
136+
state.downloadDirectory = `${state.downloadDirectoryBase}/search_${name}`;
137+
}
138+
else {
130139
const isOver18 = firstPost.over_18 ? 'nsfw' : 'clean';
131140
state.downloadDirectory = config.separate_clean_nsfw
132141
? `${state.downloadDirectoryBase}/${isOver18}/${firstPost.subreddit}`

lib/state.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ function createState(config) {
7676

7777
subredditList = result.subreddit
7878
.split(',')
79-
.map((s) => s.replace(/\s/g, ''));
79+
.map((s) => (s.startsWith('search:') ? s.trim() : s.replace(/\s/g, '')));
8080
repeatForever = result.repeatForever;
8181
numberOfPosts = result.numberOfPosts === 0 ? ALL_POSTS : result.numberOfPosts;
8282
sorting = result.sorting.replace(/\s/g, '');

lib/utils.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,11 @@ function getMediaDownloadInfo(post) {
149149
break;
150150
}
151151
}
152+
// reddituploads no longer exists. Grab the preview image instead
153+
if (downloadURL.includes('i.reddituploads.com')) {
154+
// Fix the ampersands in the URL
155+
downloadURL = sourceURL.replaceAll('&', '&');
156+
}
152157
}
153158
}
154159

@@ -178,6 +183,11 @@ function isUserProfile(target) {
178183
);
179184
}
180185

186+
function isSearchQuery(target)
187+
{
188+
return target.startsWith('search:');
189+
}
190+
181191
/**
182192
* Extract the clean name from a subreddit or user string
183193
* @param {string} target - Subreddit or user string
@@ -189,6 +199,10 @@ function extractName(target) {
189199
return target.split('user/').pop();
190200
}
191201
return target.split('u/').pop();
202+
} else if (isSearchQuery(target)) {
203+
target = target.split("search:").pop();
204+
target = target.replaceAll(" ", "+");
205+
return target;
192206
}
193207
return target;
194208
}
@@ -201,13 +215,17 @@ function extractName(target) {
201215
function buildRedditApiUrl({
202216
target,
203217
isUser,
218+
isSearch,
204219
sorting,
205220
time,
206221
limit,
207222
after = '',
208223
}) {
209224
if (isUser) {
210225
return `https://www.reddit.com/user/${target}/submitted/.json?limit=${limit}&after=${after}`;
226+
} else if (isSearch) {
227+
target = target.replaceAll(':', '%3A');
228+
return `https://www.reddit.com/search/.json?q=${target}&type=post&include_over_18=on&sort=${sorting}&limit=${limit}&after=${after}`;
211229
}
212230
return `https://www.reddit.com/r/${target}/${sorting}/.json?sort=${sorting}&t=${time}&limit=${limit}&after=${after}`;
213231
}
@@ -243,6 +261,7 @@ module.exports = {
243261
getPostTypeName,
244262
getMediaDownloadInfo,
245263
isUserProfile,
264+
isSearchQuery,
246265
extractName,
247266
buildRedditApiUrl,
248267
parsePostListFile,

0 commit comments

Comments
 (0)