|
6 | 6 | try: os.mkdir('Scrapped') |
7 | 7 | except: pass |
8 | 8 |
|
9 | | -link = 'https://www.scrapethissite.com/pages/forms/?page_num=1&per_page=25' |
10 | | -req = requests.get(link) |
11 | | -soup = bs(req.content, 'html5lib') |
12 | | - |
13 | | -data = {} |
14 | | -table = soup.findAll('table', attrs = {'class':'table'})[0] |
15 | | -tr = table.findAll('tr') |
16 | | - |
17 | | -for i, j in enumerate(tr): |
18 | | - lst = [] |
19 | | - if i == 0: |
20 | | - th = j.findAll('th') |
21 | | - for m in th: |
22 | | - lst.append(m.text.strip()) |
23 | | - else: |
24 | | - td = j.findAll('td') |
25 | | - for n in td: |
26 | | - lst.append(n.text.strip()) |
27 | | - data.update({i : lst}) |
28 | | - |
29 | | -df = pd.DataFrame.from_dict( |
30 | | - data, |
31 | | - orient='index' |
| 9 | +writer = pd.ExcelWriter( |
| 10 | + 'Scrapped/Forms, Searching & Pagination.xlsx', |
| 11 | + engine='xlsxwriter' |
32 | 12 | ) |
33 | 13 |
|
34 | | -writer = pd.ExcelWriter('Scrapped/Forms, Searching & Pagination.xlsx', engine='xlsxwriter') |
35 | | -pd.DataFrame(df).to_excel(writer, sheet_name = 'Sheet', index = False, header=False) |
36 | | -writer.save() |
| 14 | +for page in range(1, 25): |
| 15 | + link = f'https://www.scrapethissite.com/pages/forms/?page_num={page}&per_page=25' |
| 16 | + req = requests.get(link) |
| 17 | + soup = bs(req.content, 'html5lib') |
| 18 | + |
| 19 | + data = {} |
| 20 | + table = soup.findAll('table', attrs = {'class':'table'})[0] |
| 21 | + tr = table.findAll('tr') |
| 22 | + |
| 23 | + for i, j in enumerate(tr): |
| 24 | + lst = [] |
| 25 | + if i == 0: |
| 26 | + th = j.findAll('th') |
| 27 | + for m in th: |
| 28 | + lst.append(m.text.strip()) |
| 29 | + else: |
| 30 | + td = j.findAll('td') |
| 31 | + for n in td: |
| 32 | + lst.append(n.text.strip()) |
| 33 | + data.update({i : lst}) |
| 34 | + |
| 35 | + df = pd.DataFrame.from_dict( |
| 36 | + data, |
| 37 | + orient='index' |
| 38 | + ) |
| 39 | + |
| 40 | + pd.DataFrame(df).to_excel(writer, |
| 41 | + sheet_name = f'Sheet_{page}', |
| 42 | + index = False, |
| 43 | + header=False |
| 44 | + ) |
37 | 45 |
|
38 | | -# df.to_csv( |
39 | | -# 'Scrapped/Forms, Searching & Pagination.csv', |
40 | | -# index = False, |
41 | | -# header=False, |
42 | | -# encoding='utf-8' |
43 | | -# ) |
| 46 | +writer.save() |
0 commit comments