Skip to content

Commit 038c025

Browse files
more fetch calls
1 parent a3cd49e commit 038c025

File tree

4 files changed

+47
-0
lines changed

4 files changed

+47
-0
lines changed

ScrapeThisSite/Frames & iFrames.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
import requests, os
3+
from bs4 import BeautifulSoup
4+
import pandas as pd
5+
6+
try: os.mkdir('Scrapped')
7+
except: pass
8+
9+
writer = pd.ExcelWriter(
10+
'Scrapped/Frames & iFrames.xlsx',
11+
engine='xlsxwriter'
12+
)
13+
14+
def fetch(turtle = '', tag='h3', attrs_class='family-name'):
15+
link = f'https://www.scrapethissite.com/pages/frames/?frame=i&family={turtle}'
16+
17+
req = requests.get(link)
18+
soup = BeautifulSoup(req.content, 'html5lib')
19+
20+
table = soup.findAll(tag, attrs = {'class': attrs_class})
21+
return table
22+
23+
data = {}
24+
for j, i in enumerate(fetch()):
25+
lst = []
26+
turtle = i.text.strip()
27+
28+
turtle_image = fetch(turtle, tag='img', attrs_class='turtle-image center-block')[0]
29+
family_name = fetch(turtle, tag='h3', attrs_class='family-name')[0]
30+
description = fetch(turtle, tag='p', attrs_class='lead')[0]
31+
32+
lst.append(turtle_image['src'].strip())
33+
lst.append(family_name.text.strip())
34+
lst.append(description.text.strip())
35+
data.update({j : lst})
36+
37+
df = pd.DataFrame.from_dict(
38+
data,
39+
orient='index'
40+
)
41+
42+
pd.DataFrame(df).to_excel(writer,
43+
sheet_name = 'turtle',
44+
index = False,
45+
header=False
46+
)
47+
writer.save()
7.22 KB
Binary file not shown.
File renamed without changes.

static/BackupWebScrape.zip

53.4 KB
Binary file not shown.

0 commit comments

Comments
 (0)