Skip to content

Commit aaf280f

Browse files
committed
Simplify proxy handling (remove custom, use HTTP client)
1 parent 8ce2922 commit aaf280f

File tree

1 file changed

+2
-27
lines changed

1 file changed

+2
-27
lines changed

scripts/portal-fetcher/openshift-docs-downloader.py

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -893,27 +893,6 @@ async def verify_against_toc(session, html_single_urls, visited_urls, base_url,
893893
logger.info("TOC verification completed successfully.")
894894
return True
895895

896-
async def proxy_setup(proxy_url=None):
897-
"""
898-
Set up proxy configuration for requests
899-
900-
Args:
901-
proxy_url (str): Proxy URL if provided
902-
903-
Returns:
904-
dict: Proxy configuration for aiohttp
905-
"""
906-
if not proxy_url:
907-
# Check environment variables
908-
proxy_url = os.environ.get('HTTP_PROXY') or os.environ.get('http_proxy')
909-
910-
if proxy_url:
911-
logger.info(f"Using proxy: {proxy_url}")
912-
return {
913-
'proxy': proxy_url
914-
}
915-
return None
916-
917896
def export_url_mapping(db_path, output_dir):
918897
"""
919898
Export a mapping of local file paths to their source URLs
@@ -952,7 +931,7 @@ def export_change_report(db_path, output_dir):
952931

953932
return report
954933

955-
async def run_downloader(base_url, output_dir, concurrency=5, force=False, proxy_url=None, skip_toc=False):
934+
async def run_downloader(base_url, output_dir, concurrency=5, force=False, skip_toc=False):
956935
"""
957936
Run the complete download process
958937
@@ -961,7 +940,6 @@ async def run_downloader(base_url, output_dir, concurrency=5, force=False, proxy
961940
output_dir (str): Directory where documentation will be saved
962941
concurrency (int): Number of concurrent downloads
963942
force (bool): Force download even if files haven't changed
964-
proxy_url (str): Proxy URL to use for requests
965943
skip_toc (bool): Skip TOC verification
966944
967945
Returns:
@@ -986,12 +964,9 @@ async def run_downloader(base_url, output_dir, concurrency=5, force=False, proxy
986964
# Create semaphore for limiting concurrent requests
987965
semaphore = asyncio.Semaphore(concurrency)
988966

989-
# Set up proxy
990-
proxy_config = await proxy_setup(proxy_url)
991-
992967
start_time = time.time()
993968

994-
async with aiohttp.ClientSession() as session:
969+
async with aiohttp.ClientSession(trust_env=True) as session:
995970
# Step 1: Crawl to discover all html-single pages
996971
visited_urls, html_single_urls = await crawl(session, base_url, base_url, semaphore)
997972

0 commit comments

Comments
 (0)