|
1 | 1 | import datetime as dt |
2 | | -from collections.abc import Iterator |
3 | 2 |
|
4 | | -from sqlalchemy import MetaData |
| 3 | +from sqlalchemy import MetaData, insert |
5 | 4 | from sqlalchemy.orm import Session |
6 | 5 |
|
7 | 6 | from aross_stations_db.db.tables import Base, Event, Station |
@@ -65,22 +64,33 @@ def load_stations(stations: list[dict[str, str]], *, session: Session) -> None: |
65 | 64 | session.commit() |
66 | 65 |
|
67 | 66 |
|
68 | | -def load_events(events: Iterator[dict[str, str]], *, session: Session) -> None: |
69 | | - session.add_all( |
70 | | - [ |
71 | | - Event( |
72 | | - station_id=event["station_id"], |
73 | | - time_start=dt.datetime.fromisoformat(event["start"]), |
74 | | - time_end=dt.datetime.fromisoformat(event["end"]), |
75 | | - snow_on_ground=_snow_on_ground_status(event["sog"]), |
76 | | - rain_hours=int(event["RA"]), |
77 | | - freezing_rain_hours=int(event["FZRA"]), |
78 | | - solid_precipitation_hours=int(event["SOLID"]), |
79 | | - unknown_precipitation_hours=int(event["UP"]), |
80 | | - ) |
81 | | - for event in events |
82 | | - ] |
| 67 | +def generate_event_object(raw_event: dict[str, str]) -> Event: |
| 68 | + return Event( |
| 69 | + station_id=raw_event["station_id"], |
| 70 | + time_start=dt.datetime.fromisoformat(raw_event["start"]), |
| 71 | + time_end=dt.datetime.fromisoformat(raw_event["end"]), |
| 72 | + snow_on_ground=_snow_on_ground_status(raw_event["sog"]), |
| 73 | + rain_hours=int(raw_event["RA"]), |
| 74 | + freezing_rain_hours=int(raw_event["FZRA"]), |
| 75 | + solid_precipitation_hours=int(raw_event["SOLID"]), |
| 76 | + unknown_precipitation_hours=int(raw_event["UP"]), |
| 77 | + ) |
| 78 | + |
| 79 | + |
| 80 | +def load_events(events: list[Event], *, session: Session) -> None: |
| 81 | + """Load events into the database. |
| 82 | +
|
| 83 | + Trying to follow the bulk load instructions, but it's hard to tell why this step |
| 84 | + takes as long as it does. When using tqdm to monitor progress, things "stall" for |
| 85 | + some time after the iterable is consumed. I expected this would not happen because |
| 86 | + of under-the-hood batching, so I'm not really sure how to make this more performant, |
| 87 | + or if we can. |
| 88 | + """ |
| 89 | + session.execute( |
| 90 | + insert(Event), |
| 91 | + [event.__dict__ for event in events], |
83 | 92 | ) |
| 93 | + |
84 | 94 | session.commit() |
85 | 95 |
|
86 | 96 |
|
|
0 commit comments