|
| 1 | +# type: ignore[reportMissingImports] |
| 2 | + |
| 3 | +import json |
| 4 | +import logging |
| 5 | +import os |
| 6 | +from datetime import datetime, timedelta, timezone |
| 7 | +from typing import Awaitable, Callable, Literal |
| 8 | +from urllib.parse import urljoin |
| 9 | + |
| 10 | +import httpx |
| 11 | +from selectolax.parser import HTMLParser |
| 12 | +from sqlalchemy import DateTime, String, select |
| 13 | +from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine |
| 14 | +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column |
| 15 | + |
| 16 | +logger = logging.getLogger("uvicorn") |
| 17 | + |
| 18 | +APOD_HTML = os.getenv("APOD_SITE_URL", "http://www.star.ucl.ac.uk/~apod/apod/") |
| 19 | +DB_URL = "sqlite+aiosqlite:///cache.db" |
| 20 | +CACHE_EXPIRY = timedelta(hours=12) |
| 21 | + |
| 22 | +APP_ROUTE = "api/v1/apod" |
| 23 | + |
| 24 | +Scope = dict[str, str] |
| 25 | +Receive = Callable[[], Awaitable[dict]] |
| 26 | +Send = Callable[[dict], Awaitable[None]] |
| 27 | + |
| 28 | +ApodData = dict[Literal["img", "description", "title"], str] |
| 29 | + |
| 30 | + |
| 31 | +class CacheDbBase(DeclarativeBase): |
| 32 | + pass |
| 33 | + |
| 34 | + |
| 35 | +class Cache(CacheDbBase): |
| 36 | + __tablename__ = "cache" |
| 37 | + |
| 38 | + id: Mapped[int] = mapped_column(primary_key=True) |
| 39 | + date: Mapped[datetime] = mapped_column(DateTime, nullable=False) |
| 40 | + img: Mapped[str] = mapped_column(String(125), nullable=True) |
| 41 | + description: Mapped[str] = mapped_column(String(500), nullable=True) |
| 42 | + title: Mapped[str] = mapped_column(String(200), nullable=True) |
| 43 | + expire_at: Mapped[datetime] = mapped_column(DateTime, nullable=False) |
| 44 | + |
| 45 | + |
| 46 | +async def get_apod_data() -> ApodData: |
| 47 | + """Scrap data from APOD site!""" |
| 48 | + async with httpx.AsyncClient() as client: |
| 49 | + resp = await client.get(APOD_HTML) |
| 50 | + |
| 51 | + out_dict: ApodData = { |
| 52 | + "img": "", |
| 53 | + "description": "", |
| 54 | + "title": "", |
| 55 | + } |
| 56 | + tree = HTMLParser(resp.text) |
| 57 | + |
| 58 | + # template based parsing (can easily break) |
| 59 | + first_img = tree.css("img")[0] if tree.css("img") else None |
| 60 | + if first_img: |
| 61 | + out_dict["img"] = urljoin(APOD_HTML, first_img.attributes.get("src", "")) |
| 62 | + |
| 63 | + centers = tree.css("center") |
| 64 | + second_center = centers[1] if len(centers) >= 2 else None |
| 65 | + if second_center: |
| 66 | + title_part = second_center.css("b")[0] if second_center.css("b") else None |
| 67 | + if title_part: |
| 68 | + txt = title_part.text() |
| 69 | + txt = " ".join(txt.split()) |
| 70 | + out_dict["title"] = txt |
| 71 | + |
| 72 | + current = second_center |
| 73 | + while current.next: |
| 74 | + current = current.next |
| 75 | + if current.tag == "p": |
| 76 | + txt = current.text() |
| 77 | + txt = " ".join(txt.split()) |
| 78 | + out_dict["description"] = txt |
| 79 | + break |
| 80 | + |
| 81 | + return out_dict |
| 82 | + |
| 83 | + |
| 84 | +class App: |
| 85 | + def __init__(self): |
| 86 | + self.db_engine = create_async_engine(DB_URL, echo=False) |
| 87 | + self.session = async_sessionmaker(self.db_engine, expire_on_commit=False) |
| 88 | + |
| 89 | + async def get_response(self) -> dict[str, str]: |
| 90 | + out_data = { |
| 91 | + "img": "", |
| 92 | + "description": "", |
| 93 | + "title": "", |
| 94 | + "date": "", |
| 95 | + } |
| 96 | + async with self.session() as session: |
| 97 | + result = await session.execute(select(Cache).limit(1)) |
| 98 | + dat = result.scalar_one_or_none() |
| 99 | + if dat is not None: |
| 100 | + if dat.expire_at < datetime.now(): |
| 101 | + resp = await get_apod_data() |
| 102 | + _time = datetime.now(tz=timezone.utc) |
| 103 | + dat.img = resp["img"] |
| 104 | + dat.description = resp["description"] |
| 105 | + dat.title = resp["title"] |
| 106 | + dat.date = _time |
| 107 | + dat.expire_at = _time + CACHE_EXPIRY |
| 108 | + await session.commit() |
| 109 | + else: |
| 110 | + resp = await get_apod_data() |
| 111 | + _time = datetime.now(tz=timezone.utc) |
| 112 | + dat = Cache( |
| 113 | + img=resp["img"], |
| 114 | + title=resp["title"], |
| 115 | + description=resp["description"], |
| 116 | + date=_time, |
| 117 | + expire_at=_time + CACHE_EXPIRY, |
| 118 | + ) |
| 119 | + session.add(dat) |
| 120 | + await session.commit() |
| 121 | + out_data["img"] = dat.img |
| 122 | + out_data["description"] = dat.description |
| 123 | + out_data["title"] = dat.title |
| 124 | + out_data["date"] = dat.date.strftime("%Y-%m-%d") |
| 125 | + |
| 126 | + return out_data |
| 127 | + |
| 128 | + async def lifespan_handle(self, scope: Scope, receive: Receive, send: Send): |
| 129 | + while True: |
| 130 | + msg = await receive() |
| 131 | + match msg["type"]: |
| 132 | + case "lifespan.startup": |
| 133 | + await self.on_startup() |
| 134 | + await send({"type": "lifespan.startup.complete"}) |
| 135 | + case "lifespan.shutdown": |
| 136 | + await self.on_shutdown() |
| 137 | + await send({"type": "lifespan.shutdown.complete"}) |
| 138 | + case _: |
| 139 | + raise ValueError(f"unknown asgi msg type: {msg['type']}!") |
| 140 | + |
| 141 | + async def on_startup(self): |
| 142 | + async with self.db_engine.begin() as conn: |
| 143 | + await conn.run_sync(CacheDbBase.metadata.drop_all) |
| 144 | + await conn.run_sync(CacheDbBase.metadata.create_all) |
| 145 | + logger.info("[lifetime] Startup Completed!") |
| 146 | + |
| 147 | + async def on_shutdown(self): |
| 148 | + await self.db_engine.dispose() |
| 149 | + logger.info("[lifetime] Shutdown Completed!") |
| 150 | + |
| 151 | + async def serve_req(self, scope: Scope, receive: Receive, send: Send): |
| 152 | + if scope["method"] not in ["GET"]: |
| 153 | + return await self._req_error(405, "Method Not Allowed.", send) |
| 154 | + if scope["path"].strip("/") != APP_ROUTE: |
| 155 | + return await self._req_error(404, "Not Found.", send) |
| 156 | + try: |
| 157 | + resp = await self.get_response() |
| 158 | + await self._req_error(200, json.dumps(resp), send, "application/json") |
| 159 | + except Exception as exp: |
| 160 | + logger.error(f"GET Error: {exp}") |
| 161 | + logger.exception(exp) |
| 162 | + await self._req_error(500, "Internal Server Error", send) |
| 163 | + |
| 164 | + async def __call__(self, scope: Scope, receive: Receive, send: Send): |
| 165 | + match scope["type"]: |
| 166 | + case "lifespan": |
| 167 | + await self.lifespan_handle(scope, receive, send) |
| 168 | + case "http": |
| 169 | + await self.serve_req(scope, receive, send) |
| 170 | + case _: |
| 171 | + await self._req_error(400, "Bad Request.", send) |
| 172 | + |
| 173 | + async def _req_error( |
| 174 | + self, status: int, msg: str, send: Send, content_type: str = "text/plain" |
| 175 | + ): |
| 176 | + await send( |
| 177 | + { |
| 178 | + "type": "http.response.start", |
| 179 | + "status": status, |
| 180 | + "headers": [ |
| 181 | + (b"Content-Type", content_type.encode("utf-8")), |
| 182 | + (b"x-server", b"apod-api"), |
| 183 | + ], |
| 184 | + } |
| 185 | + ) |
| 186 | + await send( |
| 187 | + { |
| 188 | + "type": "http.response.body", |
| 189 | + "body": msg.encode("utf-8"), |
| 190 | + } |
| 191 | + ) |
| 192 | + |
| 193 | + |
| 194 | +app = App() |
0 commit comments