|
44 | 44 | get_crawl_scenario_countries, |
45 | 45 | get_store_app_columns, |
46 | 46 | prepare_for_psycopg, |
| 47 | + query_all_developers, |
47 | 48 | query_all_domains, |
48 | 49 | query_categories, |
49 | 50 | query_collections, |
@@ -514,6 +515,32 @@ def crawl_developers_for_new_store_ids( |
514 | 515 | logger.exception(f"{row_info=} failed!") |
515 | 516 |
|
516 | 517 |
|
| 518 | +def check_and_insert_developers( |
| 519 | + developers_df: pd.DataFrame, |
| 520 | + apps_df: pd.DataFrame, |
| 521 | + database_connection: PostgresCon, |
| 522 | +) -> pd.DataFrame: |
| 523 | + """Adds missing developers to the database and returns updated developer DataFrame.""" |
| 524 | + missing_devs = apps_df[ |
| 525 | + (~apps_df["developer_id"].isin(developers_df["developer_id"])) |
| 526 | + & (apps_df["developer_id"].notna()) |
| 527 | + ] |
| 528 | + if not missing_devs.empty: |
| 529 | + new_devs = missing_devs[ |
| 530 | + ["store", "developer_id", "developer_name"] |
| 531 | + ].drop_duplicates() |
| 532 | + new_devs = upsert_df( |
| 533 | + table_name="developers", |
| 534 | + df=new_devs.rename(columns={"developer_name": "name"}), |
| 535 | + insert_columns=["store", "developer_id", "name"], |
| 536 | + key_columns=["store", "developer_id"], |
| 537 | + database_connection=database_connection, |
| 538 | + return_rows=True, |
| 539 | + ) |
| 540 | + developers_df = pd.concat([new_devs, developers_df]) |
| 541 | + return developers_df |
| 542 | + |
| 543 | + |
517 | 544 | def check_and_insert_domains( |
518 | 545 | domains_df: pd.DataFrame, |
519 | 546 | app_urls: pd.DataFrame, |
@@ -671,39 +698,22 @@ def save_developer_info( |
671 | 698 | apps_df: pd.DataFrame, |
672 | 699 | database_connection: PostgresCon, |
673 | 700 | ) -> pd.DataFrame: |
674 | | - assert apps_df["developer_id"].to_numpy()[0], ( |
675 | | - f"{apps_df['store_id']} Missing Developer ID" |
| 701 | + all_developers_df = query_all_developers(database_connection=database_connection) |
| 702 | + all_developers_df = check_and_insert_developers( |
| 703 | + developers_df=all_developers_df, |
| 704 | + apps_df=apps_df, |
| 705 | + database_connection=database_connection, |
676 | 706 | ) |
677 | | - df = ( |
678 | | - apps_df[["store", "developer_id", "developer_name"]] |
679 | | - .rename(columns={"developer_name": "name"}) |
680 | | - .drop_duplicates() |
| 707 | + apps_df = pd.merge( |
| 708 | + apps_df, |
| 709 | + all_developers_df.rename(columns={"id": "developer"})[ |
| 710 | + ["store", "developer_id", "developer"] |
| 711 | + ], |
| 712 | + how="left", |
| 713 | + left_on=["store", "developer_id"], |
| 714 | + right_on=["store", "developer_id"], |
| 715 | + validate="m:1", |
681 | 716 | ) |
682 | | - table_name = "developers" |
683 | | - insert_columns = ["store", "developer_id", "name"] |
684 | | - key_columns = ["store", "developer_id"] |
685 | | - |
686 | | - try: |
687 | | - dev_df = upsert_df( |
688 | | - table_name=table_name, |
689 | | - df=df, |
690 | | - insert_columns=insert_columns, |
691 | | - key_columns=key_columns, |
692 | | - database_connection=database_connection, |
693 | | - return_rows=True, |
694 | | - ) |
695 | | - apps_df = pd.merge( |
696 | | - apps_df, |
697 | | - dev_df.rename(columns={"id": "developer"})[ |
698 | | - ["store", "developer_id", "developer"] |
699 | | - ], |
700 | | - how="left", |
701 | | - left_on=["store", "developer_id"], |
702 | | - right_on=["store", "developer_id"], |
703 | | - validate="m:1", |
704 | | - ) |
705 | | - except Exception as error: |
706 | | - logger.error(f"Developer insert failed with error {error}") |
707 | 717 | return apps_df |
708 | 718 |
|
709 | 719 |
|
|
0 commit comments