Skip to content

Conversation

@ciscogeek
Copy link

aiohttp.client_exceptions.ClientConnectorCertificateError: Cannot connect to host spa5.scrape.center:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:997)')]

aiohttp.client_exceptions.ClientConnectorCertificateError: Cannot connect to host spa5.scrape.center:443 ssl:True [SSLCertVerificationError: (1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:997)')]
@bestKeyal
Copy link

I fixed it, check my code.

Coder: KSM_YBKX

import asyncio
import aiohttp
import logging
import json
from motor.motor_asyncio import AsyncIOMotorClient

BASE_URL = 'https://spa5.scrape.center/api/book/?limit=18&offset={offset}'
DETAIL_URL = 'https://spa5.scrape.center/api/book/{id}'

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s: %(message)s")

PAGE_SIZE = 18
PAGE_NUMBER = 100
CONCURRENCY = 5

semaphore = asyncio.Semaphore(CONCURRENCY)
session = None

MONGO_CONNECTION_STRING = 'mongodb://localhost:27017'
MONGO_DB_NAME = 'books'
MONGO_COLLECTION_NAME = 'books'

client = AsyncIOMotorClient(MONGO_CONNECTION_STRING)
db = client[MONGO_DB_NAME]
collection = db[MONGO_COLLECTION_NAME]

async def scrape_api(url):
async with semaphore:
try:
logging.info("scraping %s", url)
async with session.get(url) as response:
return await response.json()
except aiohttp.ClientError:
logging.error("error occurred while scraping %s", url, exc_info=True)

async def save_data(data):
logging.info(f"saving data {data}")
if data:
return await collection.update_one(
{
'id': data.get('id')
},
{
"$set": data
},
upsert=True)

async def scrape_detail(id):
url = DETAIL_URL.format(id=id)
data = await scrape_api(url)
await save_data(data)

async def scrape_index(page):
url = BASE_URL.format(offset=PAGE_SIZE * (page - 1))
return await scrape_api(url)

async def main():
global session
conn = aiohttp.TCPConnector(ssl=False)
session = aiohttp.ClientSession(connector=conn)
scrape_index_tasks = [asyncio.ensure_future(scrape_index(page)) for page in range(1, PAGE_NUMBER + 1)]
result = await asyncio.gather(*scrape_index_tasks)
logging.info("result %s", json.dumps(result, ensure_ascii=False, indent=2))
ids = []
for index_data in result:
if not index_data: continue
for item in index_data.get('results'):
ids.append(item.get('id'))
scrape_id_tasks = [asyncio.ensure_future(scrape_detail(id)) for id in ids]
await asyncio.wait(scrape_id_tasks)
await session.close()

asyncio.get_event_loop().run_until_complete(main())

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants