Skip to content

Commit 109f290

Browse files
committed
testing translate function
1 parent 9091236 commit 109f290

File tree

1 file changed

+50
-13
lines changed

1 file changed

+50
-13
lines changed

notebooks/data_exploration.ipynb

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,9 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 11,
5+
"execution_count": 1,
66
"metadata": {},
7-
"outputs": [
8-
{
9-
"name": "stdout",
10-
"output_type": "stream",
11-
"text": [
12-
"c:\\Users\\rohit\\OneDrive\\Desktop\\Langchain\\DS-Project\\steam_sales\n"
13-
]
14-
}
15-
],
7+
"outputs": [],
168
"source": [
179
"import os\n",
1810
"import sys\n",
@@ -24,7 +16,7 @@
2416
},
2517
{
2618
"cell_type": "code",
27-
"execution_count": 17,
19+
"execution_count": 2,
2820
"metadata": {},
2921
"outputs": [],
3022
"source": [
@@ -57,7 +49,7 @@
5749
},
5850
{
5951
"cell_type": "code",
60-
"execution_count": 18,
52+
"execution_count": 3,
6153
"metadata": {},
6254
"outputs": [
6355
{
@@ -342,7 +334,7 @@
342334
"4 {\"FPS\": 919, \"Gore\": 58, \"Co-op\": 43, \"Retro\":... "
343335
]
344336
},
345-
"execution_count": 18,
337+
"execution_count": 3,
346338
"metadata": {},
347339
"output_type": "execute_result"
348340
}
@@ -363,6 +355,51 @@
363355
"game_data.head()"
364356
]
365357
},
358+
{
359+
"cell_type": "code",
360+
"execution_count": 27,
361+
"metadata": {},
362+
"outputs": [],
363+
"source": [
364+
"def is_english(text):\n",
365+
" try:\n",
366+
" # Detect language\n",
367+
" lang = GoogleTranslator(source='auto', target='en').translate(text)\n",
368+
" return lang\n",
369+
" except Exception as e:\n",
370+
" return text\n",
371+
" \n",
372+
"non_english_df = game_data[game_data['english'] == 0]\n",
373+
"non_english_df['genres'] = non_english_df['genres'].apply(lambda x: is_english(str(x)))\n",
374+
"\n",
375+
"# english_count = non_english_df['count_english'].sum()\n",
376+
"# non_english_count = (~non_english_df['count_english']).sum()\n",
377+
"\n",
378+
"# print(f'English Strings: {english_count}')\n",
379+
"# print(f'Non-English Strings: {non_english_count}')"
380+
]
381+
},
382+
{
383+
"cell_type": "code",
384+
"execution_count": 36,
385+
"metadata": {},
386+
"outputs": [
387+
{
388+
"data": {
389+
"text/plain": [
390+
"'Action;Adventure;Role Playing (RPG)'"
391+
]
392+
},
393+
"execution_count": 36,
394+
"metadata": {},
395+
"output_type": "execute_result"
396+
}
397+
],
398+
"source": [
399+
"test = \"Action;Eventyr;Rollespil (RPG)\"\n",
400+
"is_english(test)"
401+
]
402+
},
366403
{
367404
"cell_type": "markdown",
368405
"metadata": {},

0 commit comments

Comments
 (0)