imvickykumar999
diff --git a/‎ScrapeThisSite/Forms, Searching & Pagination.py‎
Lines changed: 35 additions & 32 deletions b/‎ScrapeThisSite/Forms, Searching & Pagination.py‎
Lines changed: 35 additions & 32 deletions
diff --git a/‎ScrapeThisSite/Scrapped/Forms, Searching & Pagination.csv‎
Lines changed: 0 additions & 26 deletions b/‎ScrapeThisSite/Scrapped/Forms, Searching & Pagination.csv‎
Lines changed: 0 additions & 26 deletions
diff --git a/‎ScrapeThisSite/Scrapped/Forms, Searching & Pagination.xlsx‎
35.2 KB b/‎ScrapeThisSite/Scrapped/Forms, Searching & Pagination.xlsx‎
35.2 KB
@@ -6,38 +6,41 @@
 try: os.mkdir('Scrapped')
 except: pass
 
-link = 'https://www.scrapethissite.com/pages/forms/?page_num=1&per_page=25'
-req = requests.get(link)
-soup = bs(req.content, 'html5lib')
-
-data = {}
-table = soup.findAll('table', attrs = {'class':'table'})[0]
-tr = table.findAll('tr')
-
-for i, j in enumerate(tr):
-    lst = []
-    if i == 0:
-        th = j.findAll('th')
-        for m in th:
-            lst.append(m.text.strip())
-    else:
-        td = j.findAll('td')
-        for n in td:
-            lst.append(n.text.strip())
-    data.update({i : lst})
-
-df = pd.DataFrame.from_dict(
-    data, 
-    orient='index'
+writer = pd.ExcelWriter(
+    'Scrapped/Forms, Searching & Pagination.xlsx', 
+    engine='xlsxwriter'
 )
 
-writer = pd.ExcelWriter('Scrapped/Forms, Searching & Pagination.xlsx', engine='xlsxwriter')
-pd.DataFrame(df).to_excel(writer, sheet_name = 'Sheet', index = False, header=False)
-writer.save()
+for page in range(1, 25):
+    link = f'https://www.scrapethissite.com/pages/forms/?page_num={page}&per_page=25'
+    req = requests.get(link)
+    soup = bs(req.content, 'html5lib')
+
+    data = {}
+    table = soup.findAll('table', attrs = {'class':'table'})[0]
+    tr = table.findAll('tr')
+
+    for i, j in enumerate(tr):
+        lst = []
+        if i == 0:
+            th = j.findAll('th')
+            for m in th:
+                lst.append(m.text.strip())
+        else:
+            td = j.findAll('td')
+            for n in td:
+                lst.append(n.text.strip())
+        data.update({i : lst})
+
+    df = pd.DataFrame.from_dict(
+        data, 
+        orient='index'
+    )
+
+    pd.DataFrame(df).to_excel(writer, 
+        sheet_name = f'Sheet_{page}', 
+        index = False, 
+        header=False
+    )
 
-# df.to_csv(
-#     'Scrapped/Forms, Searching & Pagination.csv', 
-#     index = False, 
-#     header=False,
-#     encoding='utf-8'
-# )
+writer.save()