Skip to content

Commit 1863271

Browse files
leestottCopilot
andauthored
Update 2-Working-With-Data/08-data-preparation/notebook.ipynb
Co-authored-by: Copilot <[email protected]>
1 parent 37aa09a commit 1863271

File tree

1 file changed

+18
-13
lines changed

1 file changed

+18
-13
lines changed

2-Working-With-Data/08-data-preparation/notebook.ipynb

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3815,24 +3815,29 @@
38153815
"metadata": {},
38163816
"outputs": [],
38173817
"source": [
3818-
"# rapidfuzz was already imported in an earlier cell\n",
3819-
"from rapidfuzz import process, fuzz\n",
3820-
"\n",
3821-
"\n",
3818+
"try:\n",
3819+
" from rapidfuzz import process, fuzz\n",
3820+
"except ImportError:\n",
3821+
" print(\"rapidfuzz is not installed. Please install it with 'pip install rapidfuzz' to use fuzzy matching.\")\n",
3822+
" process = None\n",
3823+
" fuzz = None\n",
38223824
"\n",
38233825
"# Get unique countries\n",
38243826
"unique_countries = dirty_data['country'].unique()\n",
38253827
"\n",
38263828
"# For each country, find similar matches\n",
3827-
"print(\"Finding similar country names (similarity > 70%):\")\n",
3828-
"for country in unique_countries:\n",
3829-
" matches = process.extract(country, unique_countries, scorer=fuzz.ratio, limit=3)\n",
3830-
" # Filter matches with similarity > 70 and not identical\n",
3831-
" similar = [m for m in matches if m[1] > 70 and m[0] != country]\n",
3832-
" if similar:\n",
3833-
" print(f\"\\n'{country}' is similar to:\")\n",
3834-
" for match, score, _ in similar:\n",
3835-
" print(f\" - '{match}' (similarity: {score}%)\")"
3829+
"if process is not None and fuzz is not None:\n",
3830+
" print(\"Finding similar country names (similarity > 70%):\")\n",
3831+
" for country in unique_countries:\n",
3832+
" matches = process.extract(country, unique_countries, scorer=fuzz.ratio, limit=3)\n",
3833+
" # Filter matches with similarity > 70 and not identical\n",
3834+
" similar = [m for m in matches if m[1] > 70 and m[0] != country]\n",
3835+
" if similar:\n",
3836+
" print(f\"\\n'{country}' is similar to:\")\n",
3837+
" for match, score, _ in similar:\n",
3838+
" print(f\" - '{match}' (similarity: {score}%)\")\n",
3839+
"else:\n",
3840+
" print(\"Skipping fuzzy matching because rapidfuzz is not available.\")"
38363841
]
38373842
},
38383843
{

0 commit comments

Comments
 (0)