Skip to content

Commit 143fcb8

Browse files
committed
selenium edge url not found fix
1 parent bd47d0a commit 143fcb8

File tree

1 file changed

+10
-8
lines changed

1 file changed

+10
-8
lines changed

substack_scraper.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020
from urllib.parse import urlparse
2121
from config import EMAIL, PASSWORD
2222

23-
USE_PREMIUM: bool = False # Set to True if you want to login to Substack and convert paid for posts
24-
BASE_SUBSTACK_URL: str = "https://www.thefitzwilliam.com/" # Substack you want to convert to markdown
23+
USE_PREMIUM: bool = True # Set to True if you want to login to Substack and convert paid for posts
24+
BASE_SUBSTACK_URL: str = "https://premSubstk.io/" # Substack you want to convert to markdown
2525
BASE_MD_DIR: str = "substack_md_files" # Name of the directory we'll save the .md essay files
2626
BASE_HTML_DIR: str = "substack_html_pages" # Name of the directory we'll save the .html essay files
2727
HTML_TEMPLATE: str = "author_template.html" # HTML template to use for the author page
2828
JSON_DATA_DIR: str = "data"
29-
NUM_POSTS_TO_SCRAPE: int = 3 # Set to 0 if you want all posts
29+
NUM_POSTS_TO_SCRAPE: int = 0 # Set to 0 if you want all posts
3030

3131

3232
def extract_main_part(url: str) -> str:
@@ -381,12 +381,14 @@ def __init__(
381381
if user_agent:
382382
options.add_argument(f'user-agent={user_agent}') # Pass this if running headless and blocked by captcha
383383

384-
if edge_driver_path:
385-
service = Service(executable_path=edge_driver_path)
386-
else:
387-
service = Service(EdgeChromiumDriverManager().install())
384+
# if edge_driver_path:
385+
# service = Service(executable_path=edge_driver_path)
386+
# else:
387+
# service = Service(EdgeChromiumDriverManager().install())
388388

389-
self.driver = webdriver.Edge(service=service, options=options)
389+
os.environ["SE_DRIVER_MIRROR_URL"] = "https://msedgedriver.microsoft.com"
390+
self.driver = webdriver.Edge()
391+
# self.driver = webdriver.Edge(service=service, options=options)
390392
self.login()
391393

392394
def login(self) -> None:

0 commit comments

Comments
 (0)