Skip to content

Commit 21ed705

Browse files
all sheets scraped
1 parent 63bdf1f commit 21ed705

File tree

3 files changed

+35
-58
lines changed

3 files changed

+35
-58
lines changed

ScrapeThisSite/Forms, Searching & Pagination.py

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,41 @@
66
try: os.mkdir('Scrapped')
77
except: pass
88

9-
link = 'https://www.scrapethissite.com/pages/forms/?page_num=1&per_page=25'
10-
req = requests.get(link)
11-
soup = bs(req.content, 'html5lib')
12-
13-
data = {}
14-
table = soup.findAll('table', attrs = {'class':'table'})[0]
15-
tr = table.findAll('tr')
16-
17-
for i, j in enumerate(tr):
18-
lst = []
19-
if i == 0:
20-
th = j.findAll('th')
21-
for m in th:
22-
lst.append(m.text.strip())
23-
else:
24-
td = j.findAll('td')
25-
for n in td:
26-
lst.append(n.text.strip())
27-
data.update({i : lst})
28-
29-
df = pd.DataFrame.from_dict(
30-
data,
31-
orient='index'
9+
writer = pd.ExcelWriter(
10+
'Scrapped/Forms, Searching & Pagination.xlsx',
11+
engine='xlsxwriter'
3212
)
3313

34-
writer = pd.ExcelWriter('Scrapped/Forms, Searching & Pagination.xlsx', engine='xlsxwriter')
35-
pd.DataFrame(df).to_excel(writer, sheet_name = 'Sheet', index = False, header=False)
36-
writer.save()
14+
for page in range(1, 25):
15+
link = f'https://www.scrapethissite.com/pages/forms/?page_num={page}&per_page=25'
16+
req = requests.get(link)
17+
soup = bs(req.content, 'html5lib')
18+
19+
data = {}
20+
table = soup.findAll('table', attrs = {'class':'table'})[0]
21+
tr = table.findAll('tr')
22+
23+
for i, j in enumerate(tr):
24+
lst = []
25+
if i == 0:
26+
th = j.findAll('th')
27+
for m in th:
28+
lst.append(m.text.strip())
29+
else:
30+
td = j.findAll('td')
31+
for n in td:
32+
lst.append(n.text.strip())
33+
data.update({i : lst})
34+
35+
df = pd.DataFrame.from_dict(
36+
data,
37+
orient='index'
38+
)
39+
40+
pd.DataFrame(df).to_excel(writer,
41+
sheet_name = f'Sheet_{page}',
42+
index = False,
43+
header=False
44+
)
3745

38-
# df.to_csv(
39-
# 'Scrapped/Forms, Searching & Pagination.csv',
40-
# index = False,
41-
# header=False,
42-
# encoding='utf-8'
43-
# )
46+
writer.save()

ScrapeThisSite/Scrapped/Forms, Searching & Pagination.csv

Lines changed: 0 additions & 26 deletions
This file was deleted.
35.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)