Skip to content

Commit 3a18400

Browse files
committed
fix: extra_query_params
1 parent c5c1e14 commit 3a18400

File tree

4 files changed

+22
-17
lines changed

4 files changed

+22
-17
lines changed

.vscode/settings.json

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
{
2-
"python.linting.pylintEnabled": true,
3-
"python.linting.enabled": true,
4-
"cSpell.words": [
5-
"adlt",
6-
"imap",
7-
"murl",
8-
"posixpath"
9-
]
10-
}
2+
"python.linting.pylintEnabled": true,
3+
"python.linting.enabled": true,
4+
"cSpell.words": [
5+
"adlt",
6+
"filterui",
7+
"imap",
8+
"iusc",
9+
"mbot",
10+
"murl",
11+
"posixpath"
12+
],
13+
"python.formatting.provider": "yapf"
14+
}

bing_images/bing.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def fetch_image_urls(
2626
keywords = query
2727
if len(file_type) > 0:
2828
keywords = query + " " + file_type
29-
urls = crawl_image_urls(keywords, filters, limit, extra_query_params)
29+
urls = crawl_image_urls(keywords, filters, limit, extra_query_params=extra_query_params)
3030
for url in urls:
3131
if isValidURL(url, file_type) and url not in result:
3232
result.append(url)
@@ -57,7 +57,7 @@ def download_images(
5757

5858
# Fetch more image URLs to avoid some images are invalid.
5959
max_number = math.ceil(limit*1.5)
60-
urls = fetch_image_urls(query, max_number, file_type, filters, extra_query_params)
60+
urls = fetch_image_urls(query, max_number, file_type, filters, extra_query_params=extra_query_params)
6161
entries = get_image_entries(urls, image_dir)
6262

6363
print("Downloading images")
@@ -131,4 +131,5 @@ def download_image_with_thread(entry):
131131
output_dir="/Users/catchzeng/Desktop/cat",
132132
pool_size=10,
133133
file_type="png",
134-
force_replace=True)
134+
force_replace=True,
135+
extra_query_params='&first=100')

bing_images/crawler.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
BASE_URL = "https://www.bing.com/images/search?"
88

99

10-
def gen_query_url(keywords, filters, extra_query_params =''):
10+
def gen_query_url(keywords, filters, extra_query_params=''):
1111
keywords_str = "&q=" + quote(keywords)
1212
query_url = BASE_URL + keywords_str
1313
if len(filters) > 0:
@@ -44,7 +44,7 @@ def image_url_from_webpage(driver, max_number=10000):
4444
return image_urls
4545

4646

47-
def crawl_image_urls(keywords, filters, max_number=10000, proxy=None, proxy_type="http", extra_query_params =''):
47+
def crawl_image_urls(keywords, filters, max_number=10000, proxy=None, proxy_type="http", extra_query_params=''):
4848
chrome_path = shutil.which("chromedriver")
4949
chrome_path = "./bin/chromedriver" if chrome_path is None else chrome_path
5050
chrome_options = webdriver.ChromeOptions()
@@ -53,7 +53,7 @@ def crawl_image_urls(keywords, filters, max_number=10000, proxy=None, proxy_type
5353
"--proxy-server={}://{}".format(proxy_type, proxy))
5454
driver = webdriver.Chrome(chrome_path, chrome_options=chrome_options)
5555

56-
query_url = gen_query_url(keywords, filters, extra_query_params)
56+
query_url = gen_query_url(keywords, filters, extra_query_params=extra_query_params)
5757
driver.set_window_size(1920, 1080)
5858
driver.get(query_url)
5959
image_urls = image_url_from_webpage(driver, max_number)
@@ -72,6 +72,6 @@ def crawl_image_urls(keywords, filters, max_number=10000, proxy=None, proxy_type
7272

7373
if __name__ == '__main__':
7474
images = crawl_image_urls(
75-
"mbot png", "+filterui:aspect-square", max_number=10)
75+
"cat png", "+filterui:aspect-square", max_number=10)
7676
for i in images:
7777
print(i+"\n")

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = bing_images
3-
version = 0.2.1
3+
version = 0.2.2
44
author = CatchZeng
55
author_email = catchzenghh@gmail.com
66
description = Python library to fetch image urls and download using multithreading from Bing.com.

0 commit comments

Comments
 (0)