1
1
export Titanic
2
+
2
3
"""
3
- Titanic Dataset
4
+ Titanic Dataset.
4
5
5
6
The titanic and titanic2 data frames describe the survival status of individual passengers on the Titanic.
6
7
@@ -24,12 +25,6 @@ using the Encyclopedia Titanica and created a new dataset called titanic3.
24
25
These datasets reflects the state of data available as of 2 August 1999.
25
26
Some duplicate passengers have been dropped, many errors corrected, many missing ages filled in, and new variables created.
26
27
27
- # Interface
28
-
29
- - [`Titanic.features`](@ref)
30
- - [`Titanic.targets`](@ref)
31
- - [`Titanic.feature_names`](@ref)
32
-
33
28
DATASET specs
34
29
35
30
NAME: titanic3
@@ -63,8 +58,6 @@ body Body Identification Number
63
58
home.dest Home/Destination
64
59
65
60
66
-
67
-
68
61
SPECIAL NOTES
69
62
70
63
Pclass is a proxy for socio-economic status (SES) 1st ~ Upper; 2nd ~ Middle; 3rd ~ Lower
@@ -94,6 +87,12 @@ attach (titanic3)
94
87
plsmo (age, survived, group=sex, datadensity=T)
95
88
# or group=pclass plot (naclus (titanic3)) # study patterns of missing values summary (survived ~ age + sex + pclass + sibsp + parch, data=titanic3)
96
89
90
+
91
+ # Interface
92
+
93
+ - [`Titanic.features`](@ref)
94
+ - [`Titanic.targets`](@ref)
95
+ - [`Titanic.feature_names`](@ref)
97
96
"""
98
97
module Titanic
99
98
@@ -116,45 +115,45 @@ julia> using MLDatasets: Titanic
116
115
julia> target = Titanic.targets();
117
116
118
117
julia> summary(target)
119
- "1×891 Matrix{Float64}"
120
-
121
- """
118
+ "1×891 Matrix{Any}"
122
119
120
+ julia> target[1]
121
+ 0
122
+ ```
123
+ """
123
124
function targets (; dir = nothing )
124
125
titanic_data = readdlm (DATA, ' ,' )
125
- reshape (Vector (titanic_data[2 : end ,2 ]), (1 , 891 ))
126
+ reshape (Vector (titanic_data[2 : end , 2 ]), (1 , 891 ))
126
127
end
127
128
128
129
"""
129
130
feature_names()
130
131
131
132
Return the the names of the features provided in the dataset.
132
133
"""
133
-
134
134
function feature_names ()
135
135
[" PassengerId" , " Pclass" , " Name" , " Sex" , " Age" , " SibSp" , " Parch" , " Ticket" , " Fare" , " Cabin" , " Embarked" ]
136
136
end
137
137
138
138
"""
139
139
features()
140
140
141
- Return the features of the Boston Housing dataset. This is a 13x506 Matrix of Float64 datatypes.
142
- The values are in the order ["crim","zn","indus","chas","nox","rm","age","dis","rad","tax","ptratio","b","lstat "].
143
- It has 506 examples.
141
+ Return the features of the Titanic dataset. This is a 11x891 Matrix of containing both String and Float datatypes.
142
+ The values are in the order ["PassengerId", "Pclass", "Name", "Sex", "Age", "SibSp", "Parch", "Ticket", "Fare", "Cabin", "Embarked "].
143
+ It has 891 examples.
144
144
145
145
```jldoctest
146
- julia> using MLDatasets: BostonHousing
146
+ julia> using MLDatasets: Titanic
147
147
148
- julia> features = BostonHousing .features();
148
+ julia> features = Titanic .features();
149
149
150
150
julia> summary(features)
151
- "13×506 Matrix{Float64 }"
151
+ "11×891 Matrix{Any }"
152
152
```
153
153
"""
154
-
155
154
function features ()
156
155
titanic_data = readdlm (DATA, ' ,' )
157
- reshape (Matrix (hcat (titanic_data[2 : end , 1 ], titanic_data[2 : end , 3 : 12 ])),(11 ,891 ))
156
+ reshape (Matrix (hcat (titanic_data[2 : end , 1 ], titanic_data[2 : end , 3 : 12 ])), (11 , 891 ))
158
157
end
159
158
160
159
end # module
0 commit comments