Skip to content

Commit 036cd35

Browse files
committed
added search
1 parent d437d64 commit 036cd35

File tree

5 files changed

+61
-28
lines changed

5 files changed

+61
-28
lines changed

pysmartprice/price_parser.py renamed to pysmartprice/abstract.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22
from bs4 import BeautifulSoup
33

44
from pysmartprice.results import SmartPriceResult
5-
from pysmartprice.helpers import scrape
5+
from pysmartprice.helpers import scrape, scrape_helper
66
from pysmartprice import constants
77

88

9-
class PriceListParser(object):
9+
class BaseParser(object):
1010
def __init__(self, mapper, **kwargs):
1111
self.mapper = mapper
12+
self.params = kwargs
1213
self.url = constants.URL_MAPPER[self.mapper]
13-
self.response = scrape(self._make_url(self.url))
14+
self.response = scrape(self._make_url(self.url), **kwargs)
1415
self.soup = BeautifulSoup(self.response, 'lxml')
1516
self.result = [
1617
SmartPriceResult(self.get_product_attrs(item))
@@ -21,17 +22,14 @@ def _make_url(self, target):
2122
return '{}{}'.format(constants.SMARTPRICE_WEB_URL, target)
2223

2324
@property
24-
def get_page_range(self):
25-
page_range = self.soup.findAll(
26-
'span', attrs={'class': 'pgntn__rslt-page'})
25+
def price_results(self):
26+
if self.get_page_range:
27+
return self.process_multiple_pages()
2728

28-
if not page_range:
29-
return None
29+
return self.result
3030

31-
first_page = int(page_range[0].text)
32-
last_page = int(page_range[1].text)
33-
return first_page, last_page
3431

32+
class ParserMixin(object):
3533
def get_product_attrs(self, item):
3634
return dict(
3735
img=item.find('img').get('src'),
@@ -48,12 +46,6 @@ def products_html(self):
4846
html = self.soup.findAll('div', attrs={'class': 'prdct-item'})
4947
return html
5048

51-
@property
52-
def get_paged_url(self):
53-
i = self.url.find(self.mapper)
54-
paged_url = '{}pages/{}'.format(self.url[:i], self.url[i:])
55-
return paged_url
56-
5749
def process_multiple_pages(self):
5850
results = self.result
5951
first_page, last_page = self.get_page_range
@@ -62,12 +54,15 @@ def process_multiple_pages(self):
6254

6355
for page in range(first_page+1, last_page+1):
6456
url = paged_url.replace('.html', '-{}.html'.format(page))
65-
page_urls.append(self._make_url(url))
57+
params = self.params.copy()
58+
if self.params.get('page', None):
59+
params.update({'page': page})
60+
page_urls.append((self._make_url(url), params))
6661

6762
# Scrape pages in parallel
6863
pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()*2)
6964

70-
for page in pool.map(scrape, page_urls):
65+
for page in pool.map(scrape_helper, page_urls):
7166
self.soup = BeautifulSoup(page, 'lxml')
7267

7368
results += [
@@ -77,8 +72,13 @@ def process_multiple_pages(self):
7772
return results
7873

7974
@property
80-
def price_results(self):
81-
if self.get_page_range:
82-
return self.process_multiple_pages()
75+
def get_page_range(self):
76+
page_range = self.soup.findAll(
77+
'span', attrs={'class': 'pgntn__rslt-page'})
8378

84-
return self.result
79+
if not page_range:
80+
return None
81+
82+
first_page = int(page_range[0].text)
83+
last_page = int(page_range[1].text)
84+
return first_page, last_page

pysmartprice/base.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1-
from pysmartprice.price_parser import PriceListParser
1+
from pysmartprice.smartparser import(
2+
PriceListParser,
3+
SearchParser
4+
)
25
from pysmartprice.constants import SMARTPRICE_ATTRS
36

47

58
class SmartPrice(object):
69

7-
def parser_results(self, product):
8-
parser = PriceListParser(product)
10+
def parser_results(self, product, **kwargs):
11+
parser = PriceListParser(product, **kwargs)
912
return parser.price_results
1013

1114
def __getattr__(self, attr):
@@ -15,3 +18,8 @@ def __getattr__(self, attr):
1518

1619
setattr(self, attr, self.parser_results(SMARTPRICE_ATTRS[attr]))
1720
return getattr(self, attr)
21+
22+
def search(self, search_key):
23+
params = dict(s=search_key, page=1)
24+
parser = SearchParser('search', **params)
25+
return parser.price_results

pysmartprice/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@
3232
'samsung-tv': 'electronics/pricelist/samsung-tv-price-list-in-india.html',
3333
'sony-tv': 'electronics/pricelist/sony-tv-price-list-in-india.html',
3434
'lg-tv': 'electronics/pricelist/lg-tv-price-list-in-india.html',
35+
'panasonic-tv': 'electronics/pricelist/panasonic-tv-price-list-in-india.html',
36+
'sharp-tv': 'electronics/pricelist/sharp-tv-price-list-in-india.html',
3537

38+
'search': 'msp/search/msp_search_new.php',
3639
}
3740

3841

pysmartprice/helpers.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import requests
22

33

4-
def scrape(url):
5-
resp = requests.get(url)
4+
def scrape(url, **kwargs):
5+
resp = requests.get(url, params=kwargs)
66
return resp.text
7+
8+
9+
def scrape_helper(args):
10+
return scrape(args[0], **args[1])

pysmartprice/smartparser.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from pysmartprice.abstract import(
2+
BaseParser,
3+
ParserMixin
4+
)
5+
6+
7+
class PriceListParser(BaseParser, ParserMixin):
8+
@property
9+
def get_paged_url(self):
10+
i = self.url.find(self.mapper)
11+
paged_url = '{}pages/{}'.format(self.url[:i], self.url[i:])
12+
return paged_url
13+
14+
15+
class SearchParser(BaseParser, ParserMixin):
16+
@property
17+
def get_paged_url(self):
18+
return self.url

0 commit comments

Comments
 (0)