|
20 | 20 | from urllib.parse import urlparse
|
21 | 21 | from config import EMAIL, PASSWORD
|
22 | 22 |
|
23 |
| -USE_PREMIUM: bool = False # Set to True if you want to login to Substack and convert paid for posts |
24 |
| -BASE_SUBSTACK_URL: str = "https://www.thefitzwilliam.com/" # Substack you want to convert to markdown |
| 23 | +USE_PREMIUM: bool = True # Set to True if you want to login to Substack and convert paid for posts |
| 24 | +BASE_SUBSTACK_URL: str = "https://premSubstk.io/" # Substack you want to convert to markdown |
25 | 25 | BASE_MD_DIR: str = "substack_md_files" # Name of the directory we'll save the .md essay files
|
26 | 26 | BASE_HTML_DIR: str = "substack_html_pages" # Name of the directory we'll save the .html essay files
|
27 | 27 | HTML_TEMPLATE: str = "author_template.html" # HTML template to use for the author page
|
28 | 28 | JSON_DATA_DIR: str = "data"
|
29 |
| -NUM_POSTS_TO_SCRAPE: int = 3 # Set to 0 if you want all posts |
| 29 | +NUM_POSTS_TO_SCRAPE: int = 0 # Set to 0 if you want all posts |
30 | 30 |
|
31 | 31 |
|
32 | 32 | def extract_main_part(url: str) -> str:
|
@@ -381,12 +381,14 @@ def __init__(
|
381 | 381 | if user_agent:
|
382 | 382 | options.add_argument(f'user-agent={user_agent}') # Pass this if running headless and blocked by captcha
|
383 | 383 |
|
384 |
| - if edge_driver_path: |
385 |
| - service = Service(executable_path=edge_driver_path) |
386 |
| - else: |
387 |
| - service = Service(EdgeChromiumDriverManager().install()) |
| 384 | + # if edge_driver_path: |
| 385 | + # service = Service(executable_path=edge_driver_path) |
| 386 | + # else: |
| 387 | + # service = Service(EdgeChromiumDriverManager().install()) |
388 | 388 |
|
389 |
| - self.driver = webdriver.Edge(service=service, options=options) |
| 389 | + os.environ["SE_DRIVER_MIRROR_URL"] = "https://msedgedriver.microsoft.com" |
| 390 | + self.driver = webdriver.Edge() |
| 391 | + # self.driver = webdriver.Edge(service=service, options=options) |
390 | 392 | self.login()
|
391 | 393 |
|
392 | 394 | def login(self) -> None:
|
|
0 commit comments