|
6 | 6 | try: os.mkdir('Scrapped')
|
7 | 7 | except: pass
|
8 | 8 |
|
9 |
| -link = 'https://www.scrapethissite.com/pages/forms/?page_num=1&per_page=25' |
10 |
| -req = requests.get(link) |
11 |
| -soup = bs(req.content, 'html5lib') |
12 |
| - |
13 |
| -data = {} |
14 |
| -table = soup.findAll('table', attrs = {'class':'table'})[0] |
15 |
| -tr = table.findAll('tr') |
16 |
| - |
17 |
| -for i, j in enumerate(tr): |
18 |
| - lst = [] |
19 |
| - if i == 0: |
20 |
| - th = j.findAll('th') |
21 |
| - for m in th: |
22 |
| - lst.append(m.text.strip()) |
23 |
| - else: |
24 |
| - td = j.findAll('td') |
25 |
| - for n in td: |
26 |
| - lst.append(n.text.strip()) |
27 |
| - data.update({i : lst}) |
28 |
| - |
29 |
| -df = pd.DataFrame.from_dict( |
30 |
| - data, |
31 |
| - orient='index' |
| 9 | +writer = pd.ExcelWriter( |
| 10 | + 'Scrapped/Forms, Searching & Pagination.xlsx', |
| 11 | + engine='xlsxwriter' |
32 | 12 | )
|
33 | 13 |
|
34 |
| -writer = pd.ExcelWriter('Scrapped/Forms, Searching & Pagination.xlsx', engine='xlsxwriter') |
35 |
| -pd.DataFrame(df).to_excel(writer, sheet_name = 'Sheet', index = False, header=False) |
36 |
| -writer.save() |
| 14 | +for page in range(1, 25): |
| 15 | + link = f'https://www.scrapethissite.com/pages/forms/?page_num={page}&per_page=25' |
| 16 | + req = requests.get(link) |
| 17 | + soup = bs(req.content, 'html5lib') |
| 18 | + |
| 19 | + data = {} |
| 20 | + table = soup.findAll('table', attrs = {'class':'table'})[0] |
| 21 | + tr = table.findAll('tr') |
| 22 | + |
| 23 | + for i, j in enumerate(tr): |
| 24 | + lst = [] |
| 25 | + if i == 0: |
| 26 | + th = j.findAll('th') |
| 27 | + for m in th: |
| 28 | + lst.append(m.text.strip()) |
| 29 | + else: |
| 30 | + td = j.findAll('td') |
| 31 | + for n in td: |
| 32 | + lst.append(n.text.strip()) |
| 33 | + data.update({i : lst}) |
| 34 | + |
| 35 | + df = pd.DataFrame.from_dict( |
| 36 | + data, |
| 37 | + orient='index' |
| 38 | + ) |
| 39 | + |
| 40 | + pd.DataFrame(df).to_excel(writer, |
| 41 | + sheet_name = f'Sheet_{page}', |
| 42 | + index = False, |
| 43 | + header=False |
| 44 | + ) |
37 | 45 |
|
38 |
| -# df.to_csv( |
39 |
| -# 'Scrapped/Forms, Searching & Pagination.csv', |
40 |
| -# index = False, |
41 |
| -# header=False, |
42 |
| -# encoding='utf-8' |
43 |
| -# ) |
| 46 | +writer.save() |
0 commit comments