|
3909 | 3909 | "metadata": {}, |
3910 | 3910 | "outputs": [], |
3911 | 3911 | "source": [ |
3912 | | - "from scipy import stats\n", |
3913 | | - "\n", |
3914 | | - "# Calculate Z-scores for age\n", |
3915 | | - "dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n", |
| 3912 | + "try:\n", |
| 3913 | + " from scipy import stats\n", |
| 3914 | + "except ImportError:\n", |
| 3915 | + " print(\"scipy is required for Z-score calculation. Please install it with 'pip install scipy' and rerun this cell.\")\n", |
| 3916 | + "else:\n", |
| 3917 | + " # Calculate Z-scores for age\n", |
| 3918 | + " dirty_data['age_zscore'] = np.abs(stats.zscore(dirty_data['age']))\n", |
3916 | 3919 | "\n", |
3917 | | - "# Typically, Z-score > 3 indicates an outlier\n", |
3918 | | - "print(\"Rows with age Z-score > 3:\")\n", |
3919 | | - "zscore_outliers = dirty_data[dirty_data['age_zscore'] > 3]\n", |
3920 | | - "print(zscore_outliers[['customer_id', 'name', 'age', 'age_zscore']])\n", |
| 3920 | + " # Typically, Z-score > 3 indicates an outlier\n", |
| 3921 | + " print(\"Rows with age Z-score > 3:\")\n", |
| 3922 | + " zscore_outliers = dirty_data[dirty_data['age_zscore'] > 3]\n", |
| 3923 | + " print(zscore_outliers[['customer_id', 'name', 'age', 'age_zscore']])\n", |
3921 | 3924 | "\n", |
3922 | | - "# Clean up the temporary column\n", |
3923 | | - "dirty_data = dirty_data.drop('age_zscore', axis=1)" |
| 3925 | + " # Clean up the temporary column\n", |
| 3926 | + " dirty_data = dirty_data.drop('age_zscore', axis=1)" |
3924 | 3927 | ] |
3925 | 3928 | }, |
3926 | 3929 | { |
|
0 commit comments