From bd47d0a7048a704f949c61edc9a428157a09626e Mon Sep 17 00:00:00 2001
From: timf34 <farrelti@tcd.ie>
Date: Sat, 10 May 2025 20:38:04 +0100
Subject: [PATCH 1/2] updated date selector

---
 substack_scraper.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/substack_scraper.py b/substack_scraper.py
index a1dc5534..e0c92a32 100644
--- a/substack_scraper.py
+++ b/substack_scraper.py
@@ -257,9 +257,10 @@ def extract_post_data(self, soup: BeautifulSoup) -> Tuple[str, str, str, str, st
         subtitle_element = soup.select_one("h3.subtitle")
         subtitle = subtitle_element.text.strip() if subtitle_element else ""
 
+        
         date_element = soup.find(
             "div",
-            class_="pencraft pc-reset _color-pub-secondary-text_1xu16_194 _line-height-20_1xu16_81 _font-meta_1xu16_116 _size-11_1xu16_32 _weight-medium_1xu16_146 _transform-uppercase_1xu16_241 _reset_1xu16_2 _meta_1xu16_441"
+            class_="pencraft pc-reset color-pub-secondary-text-hGQ02T line-height-20-t4M0El font-meta-MWBumP size-11-NuY2Zx weight-medium-fw81nC transform-uppercase-yKDgcq reset-IxiVJZ meta-EgzBVA"
         )
         date = date_element.text.strip() if date_element else "Date not found"
 

From 143fcb8232acdd2a5f8d2af0fd488b18e9aec0ef Mon Sep 17 00:00:00 2001
From: dsouzaankit <dsouzaankit@gmail.com>
Date: Tue, 12 Aug 2025 18:40:29 -0400
Subject: [PATCH 2/2] selenium edge url not found fix

---
 substack_scraper.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/substack_scraper.py b/substack_scraper.py
index e0c92a32..ad891d58 100644
--- a/substack_scraper.py
+++ b/substack_scraper.py
@@ -20,13 +20,13 @@
 from urllib.parse import urlparse
 from config import EMAIL, PASSWORD
 
-USE_PREMIUM: bool = False  # Set to True if you want to login to Substack and convert paid for posts
-BASE_SUBSTACK_URL: str = "https://www.thefitzwilliam.com/"  # Substack you want to convert to markdown
+USE_PREMIUM: bool = True  # Set to True if you want to login to Substack and convert paid for posts
+BASE_SUBSTACK_URL: str = "https://premSubstk.io/"  # Substack you want to convert to markdown
 BASE_MD_DIR: str = "substack_md_files"  # Name of the directory we'll save the .md essay files
 BASE_HTML_DIR: str = "substack_html_pages"  # Name of the directory we'll save the .html essay files
 HTML_TEMPLATE: str = "author_template.html"  # HTML template to use for the author page
 JSON_DATA_DIR: str = "data"
-NUM_POSTS_TO_SCRAPE: int = 3  # Set to 0 if you want all posts
+NUM_POSTS_TO_SCRAPE: int = 0  # Set to 0 if you want all posts
 
 
 def extract_main_part(url: str) -> str:
@@ -381,12 +381,14 @@ def __init__(
         if user_agent:
             options.add_argument(f'user-agent={user_agent}')  # Pass this if running headless and blocked by captcha
 
-        if edge_driver_path:
-            service = Service(executable_path=edge_driver_path)
-        else:
-            service = Service(EdgeChromiumDriverManager().install())
+        # if edge_driver_path:
+        #     service = Service(executable_path=edge_driver_path)
+        # else:
+        #     service = Service(EdgeChromiumDriverManager().install())
 
-        self.driver = webdriver.Edge(service=service, options=options)
+        os.environ["SE_DRIVER_MIRROR_URL"] = "https://msedgedriver.microsoft.com"
+        self.driver = webdriver.Edge()
+        # self.driver = webdriver.Edge(service=service, options=options)
         self.login()
 
     def login(self) -> None: