File tree Expand file tree Collapse file tree 2 files changed +9
-12
lines changed
Expand file tree Collapse file tree 2 files changed +9
-12
lines changed Original file line number Diff line number Diff line change @@ -16,11 +16,11 @@ def get_papers_to_scrape(
1616 stmt = select (ScrapingQueue ).where (
1717 or_ (
1818 ScrapingQueue .scraped_at >= resume_from ,
19- ScrapingQueue .scraping_attempted == False ,
19+ ScrapingQueue .scraping_attempted == False , # noqa: E712
2020 )
2121 )
2222 else :
23- stmt = select (ScrapingQueue ).where (ScrapingQueue .scraping_attempted == False )
23+ stmt = select (ScrapingQueue ).where (ScrapingQueue .scraping_attempted == False ) # noqa: E712
2424
2525 stmt = stmt .limit (limit )
2626 papers_to_scrape = session .exec (stmt ).all ()
@@ -77,10 +77,10 @@ def get_scraping_stats():
7777 stmt = select (ScrapingQueue .id )
7878 total = len (session .exec (stmt ).all ())
7979
80- scraped_stmt = select (ScrapingQueue .id ).where (ScrapingQueue .scraping_successful == True )
80+ scraped_stmt = select (ScrapingQueue .id ).where (ScrapingQueue .scraping_successful == True ) # noqa: E712
8181 scraped = len (session .exec (scraped_stmt ).all ())
8282
83- failed_stmt = select (ScrapingQueue .id ).where (ScrapingQueue .scraping_successful == False )
83+ failed_stmt = select (ScrapingQueue .id ).where (ScrapingQueue .scraping_successful == False ) # noqa: E712
8484 failed = len (session .exec (failed_stmt ).all ())
8585
8686 # Pending = total papers - successfully scraped papers
Original file line number Diff line number Diff line change 1- import os
21import re
3- import string
42
5- import numpy as np
6- import pandas as pd
73
8-
9- def remove_page_numbers (pages : list [str ], page_numbers : list [tuple [int | None , str | None ]]) -> list [str ]:
4+ def remove_page_numbers (
5+ pages : list [str ], page_numbers : list [tuple [int | None , str | None ]]
6+ ) -> list [str ]:
107 """Remove page numbers from pages based on identified page_numbers list."""
118 if page_numbers is None :
129 return pages
@@ -110,12 +107,12 @@ def keep_consistent(
110107
111108 # If a dominant 'where' is found, set others to None
112109 if dominant_where is not None :
113- for i , (pn , where ) in enumerate (page_numbers ):
110+ for i , (_ , where ) in enumerate (page_numbers ):
114111 if where != dominant_where :
115112 page_numbers [i ] = (None , None )
116113
117114 last_valid = None
118- for i , (pn , where ) in enumerate (page_numbers ):
115+ for i , (pn , _ ) in enumerate (page_numbers ):
119116 if pn is not None :
120117 if last_valid is None :
121118 last_valid = (i , pn )
You can’t perform that action at this time.
0 commit comments