Skip to content

Commit 576a842

Browse files
committed
chore: Update SteamStoreCleaner to translate non-English categories and genres
1 parent b291912 commit 576a842

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

notebooks/steam_data_cleaning.ipynb

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
"import pandas as pd\n",
2929
"from bs4 import BeautifulSoup\n",
3030
"from db import get_db\n",
31+
"from deep_translator import GoogleTranslator\n",
3132
"from settings import Path\n",
3233
"from sqlalchemy import text\n",
3334
"from utils import check_na, print_steam_links\n",
@@ -1659,6 +1660,16 @@
16591660
" for col in ['categories', 'genres']:\n",
16601661
" df[col] = df[col].apply(lambda x: ';'.join(item['description'] for item in literal_eval(x)))\n",
16611662
" \n",
1663+
" def translate(text):\n",
1664+
" try:\n",
1665+
" lang = GoogleTranslator(source=\"auto\", target=\"en\").translate(text)\n",
1666+
" return lang\n",
1667+
" except Exception as e:\n",
1668+
" return text\n",
1669+
" \n",
1670+
" df.loc[df[\"english\"] == 0, \"genres\"] = df.loc[df[\"english\"] == 0, \"genres\"].apply(translate)\n",
1671+
" df.loc[df[\"english\"] == 0, \"categories\"] = df.loc[df[\"english\"] == 0, \"categories\"].apply(translate)\n",
1672+
" \n",
16621673
" return df\n",
16631674
"\n",
16641675
"cat_gen_df = process_categories_and_genres(price_df)\n",

0 commit comments

Comments
 (0)