@@ -197,33 +197,33 @@ def crawl_keyword_cranks(database_connection: PostgresCon) -> None:
197197 language = language ,
198198 keyword = keyword ,
199199 )
200+ df ["keyword_text" ] = keyword
201+ df ["keyword_id" ] = row ["keyword_id" ]
200202 df ["language" ] = language .lower ()
201203 df ["country" ] = country .upper ()
202- df ["crawled_date" ] = datetime .datetime .now (tz = datetime .UTC ).date ()
204+ df ["crawled_at" ] = datetime .datetime .now (tz = datetime .UTC )
205+ df ["crawled_date" ] = df ["crawled_at" ].dt .date
203206 all_keywords = pd .concat ([all_keywords , df ], ignore_index = True )
204207 except Exception :
205208 logger .exception (f"Scrape keyword={ keyword } hit error, skipping" )
206209 raw_keywords_to_s3 (all_keywords )
210+ all_keywords = all_keywords .rename (columns = {"keyword_id" : "keyword" })
211+ key_columns = ["keyword" ]
212+ upsert_df (
213+ table_name = "keywords_crawled_at" ,
214+ schema = "logging" ,
215+ insert_columns = ["keyword" , "crawled_at" ],
216+ df = all_keywords [["keyword" , "crawled_at" ]],
217+ key_columns = key_columns ,
218+ database_connection = database_connection ,
219+ )
207220
208221
209222# def import_keywords_from_s3(database_connection: PostgresCon) -> None:
210223# languages_map = query_languages(database_connection)
211224# language_dict = languages_map.set_index("language_slug")["id"].to_dict()
212225# language_key = language_dict[language]
213- # key_columns = ["keyword"]
214- # upsert_df(
215- # table_name="keywords_crawled_at",
216- # schema="logging",
217- # insert_columns=["keyword", "crawled_at"],
218- # df=pd.DataFrame(
219- # {
220- # "keyword": [keyword_id],
221- # "crawled_at": datetime.datetime.now(tz=datetime.UTC),
222- # }
223- # ),
224- # key_columns=key_columns,
225- # database_connection=database_connection,
226- # )
226+ #
227227
228228
229229def scrape_store_ranks (database_connection : PostgresCon , store : int ) -> None :
0 commit comments