|
1 | 1 | const defdir = joinpath(dirname(@__FILE__), "..", "datasets") |
2 | 2 |
|
3 | 3 | function getmovielensdata(dir) |
4 | | - mkpath(dir) |
5 | | - path = download("http://files.grouplens.org/datasets/movielens/ml-100k.zip") |
6 | | - run(unpack_cmd(path,dir,".zip", "")) |
| 4 | + mkpath(dir) |
| 5 | + path = download("http://files.grouplens.org/datasets/movielens/ml-100k.zip") |
| 6 | + run(unpack_cmd(path,dir,".zip", "")) |
7 | 7 | end |
8 | 8 |
|
9 | 9 | function getmovielensdata1m(dir) |
10 | | - mkpath(dir) |
11 | | - path = download("http://files.grouplens.org/datasets/movielens/ml-1m.zip") |
12 | | - run(unpack_cmd(path,dir,".zip", "")) |
| 10 | + mkpath(dir) |
| 11 | + path = download("http://files.grouplens.org/datasets/movielens/ml-1m.zip") |
| 12 | + run(unpack_cmd(path,dir,".zip", "")) |
13 | 13 | end |
14 | 14 |
|
15 | 15 | """ |
16 | | - MovieLens()::Persa.TimeCFDataset |
| 16 | + MovieLens()::Persa.Dataset |
17 | 17 |
|
18 | 18 | Return MovieLens 100k dataset. |
19 | 19 | """ |
20 | | -function MovieLens()::Persa.TimeCFDataset |
21 | | - file = "$(defdir)/ml-100k/u.data" |
| 20 | +function MovieLens()::Persa.Dataset |
| 21 | + filename = "$(defdir)/ml-100k/u.data" |
22 | 22 |
|
23 | | - isfile(file) || getmovielensdata(defdir) |
| 23 | + isfile(filename) || getmovielensdata(defdir) |
24 | 24 |
|
25 | | - file = readtable(file, separator = ' ', header = false) |
| 25 | + file = CSV.read(filename, delim = ' ', |
| 26 | + header = [:user, :item, :rating, :timestamp], |
| 27 | + allowmissing = :none) |
26 | 28 |
|
27 | | - df = DataFrame() |
28 | | - |
29 | | - df[:user] = file[:,1] |
30 | | - df[:item] = file[:,2] |
31 | | - df[:rating] = file[:,3] |
32 | | - df[:timestamp] = file[:,4] |
33 | | - |
34 | | - return Persa.Dataset(df) |
| 29 | + return Persa.Dataset(file) |
35 | 30 | end |
36 | 31 |
|
37 | 32 | """ |
38 | | - MovieLens1M()::Persa.TimeCFDataset |
| 33 | + MovieLens1M()::Persa.Dataset |
39 | 34 |
|
40 | 35 | Return MovieLens 1M dataset. |
41 | 36 | """ |
42 | | -function MovieLens1M()::Persa.TimeCFDataset |
43 | | - file = "$(defdir)/ml-1m/ratings.dat" |
| 37 | +function MovieLens1M()::Persa.Dataset |
| 38 | + filename = "$(defdir)/ml-1m/ratings.dat" |
44 | 39 |
|
45 | | - isfile(file) || getmovielensdata1m(defdir) |
| 40 | + isfile(filename) || getmovielensdata1m(defdir) |
46 | 41 |
|
47 | | - file = readtable(file, separator = ':', header = false) |
| 42 | + file = CSV.read(filename, delim = "::", |
| 43 | + header = [:user, :item, :rating, :timestamp], |
| 44 | + allowmissing = :all) |
48 | 45 |
|
49 | | - df = DataFrame() |
| 46 | + df = DataFrame() |
50 | 47 |
|
51 | | - df[:user] = file[:,1] |
52 | | - df[:item] = labelencode(labelmap(file[:,3]), file[:,3]) |
53 | | - df[:rating] = file[:,5] |
54 | | - df[:timestamp] = file[:,7] |
| 48 | + df[:user] = convert(Array{Int}, file[:user]) |
| 49 | + df[:item] = convert(Array{Int}, file[:item]) |
| 50 | + df[:item] = labelencode(labelmap(df[:item]), df[:item]) |
| 51 | + df[:rating] = convert(Array{Int}, file[:rating]) |
| 52 | + df[:timestamp] = convert(Array{Int}, file[:timestamp]) |
55 | 53 |
|
56 | | - return Persa.Dataset(df) |
| 54 | + return Persa.Dataset(df) |
57 | 55 | end |
0 commit comments