Skip to content

Commit 8b3068a

Browse files
Sorted imports with isort & Autoformat code with Black
1 parent ad07647 commit 8b3068a

File tree

10 files changed

+467
-380
lines changed

10 files changed

+467
-380
lines changed

dag/flows/healthcheck.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import platform
2-
import prefect
3-
from prefect import task, flow, get_run_logger
42
import sys
53

4+
import prefect
5+
from prefect import flow, get_run_logger, task
6+
67

78
@task
89
def log_platform_info():

get_version.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import warnings
2+
23
import versioneer
34

45
if __name__ == "__main__":

notebooks/data_exploration.ipynb

Lines changed: 45 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -339,9 +339,9 @@
339339
}
340340
],
341341
"source": [
342-
"with open(os.path.join(Path.sql_queries, 'get_all_game_data.sql'), \"r\") as f:\n",
342+
"with open(os.path.join(Path.sql_queries, \"get_all_game_data.sql\"), \"r\") as f:\n",
343343
" query = text(f.read())\n",
344-
" \n",
344+
"\n",
345345
"\n",
346346
"with get_db() as db:\n",
347347
" result = db.execute(query)\n",
@@ -371,7 +371,7 @@
371371
}
372372
],
373373
"source": [
374-
"game_data['description'].iloc[10000-4-1]"
374+
"game_data[\"description\"].iloc[10000 - 4 - 1]"
375375
]
376376
},
377377
{
@@ -396,6 +396,7 @@
396396
"source": [
397397
"from fuzzywuzzy import process\n",
398398
"\n",
399+
"\n",
399400
"def get_unique(series):\n",
400401
" \"\"\"\n",
401402
" Returns a set of unique values from a series of strings.\n",
@@ -407,7 +408,7 @@
407408
" set: A set of unique values extracted from the series.\n",
408409
"\n",
409410
" \"\"\"\n",
410-
" return set(list(itertools.chain(*series.apply(lambda x: [c for c in x.split(';')]))))"
411+
" return set(list(itertools.chain(*series.apply(lambda x: [c for c in x.split(\";\")]))))"
411412
]
412413
},
413414
{
@@ -461,7 +462,7 @@
461462
}
462463
],
463464
"source": [
464-
"geners = get_unique(game_data['genres'])\n",
465+
"geners = get_unique(game_data[\"genres\"])\n",
465466
"geners"
466467
]
467468
},
@@ -494,28 +495,30 @@
494495
"def standardize_genre(value, genre_list):\n",
495496
" # Convert to lowercase for consistent comparison\n",
496497
" value_lower = value.lower()\n",
497-
" \n",
498+
"\n",
498499
" # Define common patterns\n",
499-
" if 'rpg' in value_lower or 'role playing' in value_lower or 'role' in value_lower:\n",
500-
" return 'RPG'\n",
501-
" if 'simulation' in value_lower or 'simulators' in value_lower:\n",
502-
" return 'Simulation'\n",
503-
" if 'adventure' in value_lower:\n",
504-
" return 'Adventure'\n",
500+
" if \"rpg\" in value_lower or \"role playing\" in value_lower or \"role\" in value_lower:\n",
501+
" return \"RPG\"\n",
502+
" if \"simulation\" in value_lower or \"simulators\" in value_lower:\n",
503+
" return \"Simulation\"\n",
504+
" if \"adventure\" in value_lower:\n",
505+
" return \"Adventure\"\n",
506+
"\n",
505507
"\n",
506508
"# Function to standardize multiple genres\n",
507509
"def standardize_multiple_genres(genres_str, genre_list):\n",
508-
" genres = genres_str.split(';')\n",
510+
" genres = genres_str.split(\";\")\n",
509511
" standardized_genres = [standardize_genre(genre.strip(), genre_list) for genre in genres]\n",
510-
" return ';'.join(sorted(set(standardized_genres))) # Use sorted(set()) to remove duplicates and sort\n",
511-
" \n",
512+
" return \";\".join(sorted(set(standardized_genres))) # Use sorted(set()) to remove duplicates and sort\n",
513+
"\n",
512514
" # Find the best match from the list of unique genres\n",
513515
" match, score = process.extractOne(value, genre_list)\n",
514516
" return match\n",
515517
"\n",
518+
"\n",
516519
"# Apply the standardization function to the Genres column\n",
517-
"game_data['genres'] = game_data['genres'].apply(lambda x: standardize_multiple_genres(x, geners))\n",
518-
"geners = get_unique(game_data['genres'])\n",
520+
"game_data[\"genres\"] = game_data[\"genres\"].apply(lambda x: standardize_multiple_genres(x, geners))\n",
521+
"geners = get_unique(game_data[\"genres\"])\n",
519522
"geners"
520523
]
521524
},
@@ -615,7 +618,7 @@
615618
}
616619
],
617620
"source": [
618-
"categories = get_unique(game_data['categories'])\n",
621+
"categories = get_unique(game_data[\"categories\"])\n",
619622
"categories"
620623
]
621624
},
@@ -643,21 +646,22 @@
643646
" - score: The calculated rating score as a percentage.\n",
644647
"\n",
645648
" \"\"\"\n",
646-
" pos = row['positive_ratings']\n",
647-
" neg = row['negative_ratings']\n",
649+
" pos = row[\"positive_ratings\"]\n",
650+
" neg = row[\"negative_ratings\"]\n",
648651
"\n",
649652
" total_reviews = pos + neg\n",
650-
" \n",
653+
"\n",
651654
" if total_reviews > 0:\n",
652655
" average = pos / total_reviews\n",
653-
" score = average - (average * 0.5) * 2**(-math.log10(total_reviews + 1))\n",
656+
" score = average - (average * 0.5) * 2 ** (-math.log10(total_reviews + 1))\n",
654657
" return score * 100\n",
655658
" else:\n",
656659
" return 0.0\n",
657660
"\n",
658-
"game_data['total_ratings'] = game_data['positive_ratings'] + game_data['negative_ratings']\n",
659-
"game_data['review_score'] = game_data['positive_ratings'] / game_data['total_ratings']\n",
660-
"game_data['rating'] = game_data.apply(calc_rating, axis=1)"
661+
"\n",
662+
"game_data[\"total_ratings\"] = game_data[\"positive_ratings\"] + game_data[\"negative_ratings\"]\n",
663+
"game_data[\"review_score\"] = game_data[\"positive_ratings\"] / game_data[\"total_ratings\"]\n",
664+
"game_data[\"rating\"] = game_data.apply(calc_rating, axis=1)"
661665
]
662666
},
663667
{
@@ -996,24 +1000,25 @@
9961000
"source": [
9971001
"def categorize_year(year):\n",
9981002
" if year < 2020:\n",
999-
" return 'Before 2020'\n",
1003+
" return \"Before 2020\"\n",
10001004
" elif 2020 <= year <= 2022:\n",
1001-
" return '2020-2022'\n",
1005+
" return \"2020-2022\"\n",
10021006
" else:\n",
1003-
" return 'After 2022'\n",
1007+
" return \"After 2022\"\n",
1008+
"\n",
10041009
"\n",
1005-
"game_data['year'] = game_data['year'].fillna(0).astype(int) \n",
1006-
"game_data['Region'] = game_data['year'].apply(categorize_year)\n",
1010+
"game_data[\"year\"] = game_data[\"year\"].fillna(0).astype(int)\n",
1011+
"game_data[\"Region\"] = game_data[\"year\"].apply(categorize_year)\n",
10071012
"\n",
10081013
"# Calculate the frequency of each year\n",
1009-
"yearly_counts = game_data.groupby(['Region', 'year']).size().reset_index(name='Frequency')\n",
1014+
"yearly_counts = game_data.groupby([\"Region\", \"year\"]).size().reset_index(name=\"Frequency\")\n",
10101015
"\n",
10111016
"# Plotting using Seaborn\n",
10121017
"plt.figure(figsize=(12, 6))\n",
1013-
"sns.barplot(data=yearly_counts, x='year', y='Frequency', hue='Region')\n",
1014-
"plt.title('Game Release by Year')\n",
1015-
"plt.xlabel('Year')\n",
1016-
"plt.ylabel('Frequency')\n",
1018+
"sns.barplot(data=yearly_counts, x=\"year\", y=\"Frequency\", hue=\"Region\")\n",
1019+
"plt.title(\"Game Release by Year\")\n",
1020+
"plt.xlabel(\"Year\")\n",
1021+
"plt.ylabel(\"Frequency\")\n",
10171022
"plt.xticks(rotation=45)\n",
10181023
"plt.show()"
10191024
]
@@ -1031,12 +1036,12 @@
10311036
"metadata": {},
10321037
"outputs": [],
10331038
"source": [
1034-
"tags = col_row_df['tags']\n",
1039+
"tags = col_row_df[\"tags\"]\n",
10351040
"parsed_tags = tags.apply(lambda x: literal_eval(x) if x else {})\n",
10361041
"\n",
10371042
"unique_tags = set(itertools.chain(*parsed_tags))\n",
10381043
"\n",
1039-
"print('Number of unique tags:', len(unique_tags))\n",
1044+
"print(\"Number of unique tags:\", len(unique_tags))\n",
10401045
"\n",
10411046
"# Create a DataFrame with 15 columns and 30 rows\n",
10421047
"num_columns = 15\n",
@@ -1045,7 +1050,7 @@
10451050
"unique_tags = sorted(list(unique_tags))\n",
10461051
"\n",
10471052
"# Reshape the list into the desired DataFrame shape\n",
1048-
"ut = [unique_tags[i * num_columns:(i + 1) * num_columns] for i in range(num_rows)]\n",
1053+
"ut = [unique_tags[i * num_columns : (i + 1) * num_columns] for i in range(num_rows)]\n",
10491054
"\n",
10501055
"# Create the DataFrame\n",
10511056
"utdf = pd.DataFrame(ut)\n",
@@ -1079,8 +1084,8 @@
10791084
"metadata": {},
10801085
"outputs": [],
10811086
"source": [
1082-
"langs = col_row_df['languages']\n",
1083-
"langs = langs.apply(lambda x: x.split(', ') if x else [])\n",
1087+
"langs = col_row_df[\"languages\"]\n",
1088+
"langs = langs.apply(lambda x: x.split(\", \") if x else [])\n",
10841089
"\n",
10851090
"langc = Counter()\n",
10861091
"\n",

0 commit comments

Comments
 (0)