Skip to content

Commit f4aef47

Browse files
authored
Create TitanicSurvival.py
1 parent 995229b commit f4aef47

File tree

1 file changed

+66
-0
lines changed

1 file changed

+66
-0
lines changed

TitanicSurvival.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
import os
5+
for dirname, _, filenames in os.walk('/kaggle/input'):
6+
for filename in filenames:
7+
print(os.path.join(dirname, filename))import pandas as pd
8+
9+
titanic_df = pd.read_csv('/kaggle/input/titanic-datasets/titanic.csv')
10+
11+
titanic_df.head(10)
12+
titanic_df.info()
13+
titanic_df.describe()
14+
titanic_df.columns
15+
print(titanic_df.isnull().sum())
16+
titanic_df['Age'].isnull().sum()
17+
print(titanic_df['Sex'].unique())
18+
print(titanic_df.tail())
19+
titanic_df.boxplot()
20+
titanic_df.hist()
21+
survival_rate = titanic_df['Survived'].count()
22+
print(survival_rate)
23+
24+
import matplotlib.pyplot as plt
25+
26+
survival_by_class = titanic_df.groupby('Pclass')['Survived'].count()
27+
survival_by_class.plot(kind='bar',color='green',edgecolor='black')
28+
plt.title('Survival Rate by Passenger Class')
29+
plt.xlabel('Passenger Class')
30+
plt.ylabel('Survival Rate')
31+
plt.show()
32+
print(survival_by_class)
33+
34+
plt.hist(titanic_df['Age'],bins=20,color='green',edgecolor='black')
35+
plt.title('Age Distribution')
36+
plt.xlabel('Age')
37+
plt.ylabel('Frequency')
38+
plt.show()
39+
40+
import seaborn as sns
41+
42+
sns.histplot(titanic_df['Age'].dropna(),bins=20,kde=True)
43+
plt.title('Age Distribution')
44+
plt.show()
45+
46+
sns.histplot(x='Age',hue='Survived',data=titanic_df,bins=20,kde=True)
47+
plt.title('Age Distribution by survival status')
48+
plt.show()
49+
50+
numeric_columns = titanic_df.select_dtypes(include=['float64','int64']).columns
51+
52+
correlation_matrix = titanic_df[numeric_columns].corr()
53+
54+
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm',linewidths=0.5)
55+
plt.title('Correlation Matrix')
56+
plt.show()
57+
58+
sns.countplot(x='Sex',data=titanic_df)
59+
plt.title('Gender Distribution')
60+
plt.show()
61+
62+
sns.barplot(x='Sex',y='Survived',data=titanic_df)
63+
plt.title('Survival Rate by Gender')
64+
plt.show()
65+
66+

0 commit comments

Comments
 (0)