Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions substack_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
from urllib.parse import urlparse
from config import EMAIL, PASSWORD

USE_PREMIUM: bool = False # Set to True if you want to login to Substack and convert paid for posts
BASE_SUBSTACK_URL: str = "https://www.thefitzwilliam.com/" # Substack you want to convert to markdown
USE_PREMIUM: bool = True # Set to True if you want to login to Substack and convert paid for posts
BASE_SUBSTACK_URL: str = "https://premSubstk.io/" # Substack you want to convert to markdown
BASE_MD_DIR: str = "substack_md_files" # Name of the directory we'll save the .md essay files
BASE_HTML_DIR: str = "substack_html_pages" # Name of the directory we'll save the .html essay files
HTML_TEMPLATE: str = "author_template.html" # HTML template to use for the author page
JSON_DATA_DIR: str = "data"
NUM_POSTS_TO_SCRAPE: int = 3 # Set to 0 if you want all posts
NUM_POSTS_TO_SCRAPE: int = 0 # Set to 0 if you want all posts


def extract_main_part(url: str) -> str:
Expand Down Expand Up @@ -381,12 +381,14 @@ def __init__(
if user_agent:
options.add_argument(f'user-agent={user_agent}') # Pass this if running headless and blocked by captcha

if edge_driver_path:
service = Service(executable_path=edge_driver_path)
else:
service = Service(EdgeChromiumDriverManager().install())
# if edge_driver_path:
# service = Service(executable_path=edge_driver_path)
# else:
# service = Service(EdgeChromiumDriverManager().install())

self.driver = webdriver.Edge(service=service, options=options)
os.environ["SE_DRIVER_MIRROR_URL"] = "https://msedgedriver.microsoft.com"
self.driver = webdriver.Edge()
# self.driver = webdriver.Edge(service=service, options=options)
self.login()

def login(self) -> None:
Expand Down