Skip to content

Commit e8ec56e

Browse files
Add Faust Dataset (#160)
* Add basic dataset * Add Docs * Add manual tests * Add review changes * Break long lines * Update src/datasets/meshes/faust.jl * Update src/datasets/meshes/faust.jl * Update src/datasets/meshes/faust.jl * add branching * Add proper spaces for julia-repl * Fix test example * remove backslash and join lines Co-authored-by: Carlo Lucibello <[email protected]>
1 parent c859843 commit e8ec56e

File tree

8 files changed

+266
-17
lines changed

8 files changed

+266
-17
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
2020
MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
2121
NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605"
2222
Pickle = "fbb45041-c46e-462f-888f-7c521cafbc2c"
23+
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
2324
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
2425
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
2526
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

docs/make.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ makedocs(
2424
"Home" => "index.md",
2525
"Datasets" => Any[
2626
"Graphs" => "datasets/graphs.md",
27+
"Meshes" => "datasets/meshes.md",
2728
"Miscellaneous" => "datasets/misc.md",
2829
"Text" => "datasets/text.md",
2930
"Vision" => "datasets/vision.md",

docs/src/datasets/meshes.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Mesh Datasets
2+
3+
Mesh datasets contains data in the form of `Meshes.Mesh`. See [Meshes.jl](https://juliageometry.github.io/Meshes.jl/stable/) for a better understanding of Meshes.
4+
5+
## Index
6+
7+
```@index
8+
Pages = ["meshes.md"]
9+
```
10+
11+
## Documentation
12+
13+
```@docs
14+
FAUST
15+
```

src/MLDatasets.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ using Tables
66
using DataDeps
77
import MLUtils
88
using MLUtils: getobs, numobs, AbstractDataContainer
9+
using Printf
910
using Glob
1011
using DelimitedFiles: readdlm
1112
using FileIO
@@ -124,6 +125,11 @@ export Reddit
124125
include("datasets/graphs/tudataset.jl")
125126
export TUDataset
126127

128+
# Meshes
129+
130+
include("datasets/meshes/faust.jl")
131+
export FAUST
132+
127133
function __init__()
128134
# TODO automatically find and execute __init__xxx functions
129135

@@ -154,6 +160,9 @@ function __init__()
154160
__init__mnist()
155161
__init__omniglot()
156162
__init__svhn2()
163+
164+
# mesh
165+
__init__faust()
157166
end
158167

159168
end #module

src/abstract_datasets.jl

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,22 @@ end
1919

2020
function Base.show(io::IO, ::MIME"text/plain", d::D) where D <: AbstractDataset
2121
recur_io = IOContext(io, :compact => false)
22-
22+
2323
print(io, "dataset $(D.name.name):") # if the type is parameterized don't print the parameters
24-
25-
for f in fieldnames(D)
26-
if !startswith(string(f), "_")
27-
fstring = leftalign(string(f), 10)
28-
print(recur_io, "\n $fstring => ")
29-
# show(recur_io, MIME"text/plain"(), getfield(d, f))
30-
# println(recur_io)
31-
print(recur_io, "$(_summary(getfield(d, f)))")
32-
end
24+
25+
fnames = filter(!startswith("_"), string.(fieldnames(D)))
26+
f_length = max(length.(fnames)...)
27+
for f in fnames
28+
fstring = leftalign(f, f_length)
29+
print(recur_io, "\n $fstring => ")
30+
# show(recur_io, MIME"text/plain"(), getfield(d, f))
31+
# println(recur_io)
32+
print(recur_io, "$(_summary(getfield(d, Symbol(f))))")
3333
end
3434
end
3535

3636
function leftalign(s::AbstractString, n::Int)
37-
m = length(s)
37+
m = length(s)
3838
if m > n
3939
return s[1:n]
4040
else
@@ -60,7 +60,7 @@ a `features` and a `targets` fields.
6060
abstract type SupervisedDataset <: AbstractDataset end
6161

6262

63-
Base.length(d::SupervisedDataset) = Tables.istable(d.features) ? numobs_table(d.features) :
63+
Base.length(d::SupervisedDataset) = Tables.istable(d.features) ? numobs_table(d.features) :
6464
numobs((d.features, d.targets))
6565

6666

@@ -69,7 +69,7 @@ Base.getindex(d::SupervisedDataset, ::Colon) = Tables.istable(d.features) ?
6969
(features = d.features, targets=d.targets) :
7070
getobs((; d.features, d.targets))
7171

72-
Base.getindex(d::SupervisedDataset, i) = Tables.istable(d.features) ?
72+
Base.getindex(d::SupervisedDataset, i) = Tables.istable(d.features) ?
7373
(features = getobs_table(d.features, i), targets=getobs_table(d.targets, i)) :
7474
getobs((; d.features, d.targets), i)
7575

@@ -99,13 +99,13 @@ const ARGUMENTS_SUPERVISED_TABLE = """
9999

100100
const FIELDS_SUPERVISED_TABLE = """
101101
- `metadata`: A dictionary containing additional information on the dataset.
102-
- `features`: The data features. An array if `as_df=true`, otherwise a dataframe.
102+
- `features`: The data features. An array if `as_df=true`, otherwise a dataframe.
103103
- `targets`: The targets for supervised learning. An array if `as_df=true`, otherwise a dataframe.
104104
- `dataframe`: A dataframe containing both `features` and `targets`. It is `nothing` if `as_df=false`.
105105
"""
106106

107107
const METHODS_SUPERVISED_TABLE = """
108-
- `dataset[i]`: Return observation(s) `i` as a named tuple of features and targets.
108+
- `dataset[i]`: Return observation(s) `i` as a named tuple of features and targets.
109109
- `dataset[:]`: Return all observations as a named tuple of features and targets.
110110
- `length(dataset)`: Number of observations.
111111
"""
@@ -119,12 +119,12 @@ const ARGUMENTS_SUPERVISED_ARRAY = """
119119

120120
const FIELDS_SUPERVISED_ARRAY = """
121121
- `metadata`: A dictionary containing additional information on the dataset.
122-
- `features`: An array storing the data features.
122+
- `features`: An array storing the data features.
123123
- `targets`: An array storing the targets for supervised learning.
124124
"""
125125

126126
const METHODS_SUPERVISED_ARRAY = """
127-
- `dataset[i]`: Return observation(s) `i` as a named tuple of features and targets.
127+
- `dataset[i]`: Return observation(s) `i` as a named tuple of features and targets.
128128
- `dataset[:]`: Return all observations as a named tuple of features and targets.
129129
- `length(dataset)`: Number of observations.
130130
"""

src/datasets/meshes/faust.jl

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
function __init__faust()
2+
DEPNAME = "MPI-FAUST"
3+
DOCS = "http://faust.is.tue.mpg.de/"
4+
5+
register(ManualDataDep(
6+
DEPNAME,
7+
"""
8+
Dataset: $DEPNAME.
9+
Website: $DOCS
10+
""",
11+
))
12+
end
13+
14+
"""
15+
FAUST(split=:train; dir=nothing)
16+
17+
The MPI FAUST dataset (2014).
18+
19+
FAUST contains 300 real, high-resolution human scans of 10 different subjects in 30 different poses,
20+
with automatically computed ground-truth correspondences.
21+
22+
Each scan is a high-resolution, triangulated, non-watertight mesh acquired with a 3D multi-stereo system.
23+
24+
FAUST is subdivided into a training and a test set. The training set includes 100 scans (10 per subject)
25+
with their corresponding ground-truth alignments. The test set includes 200 scans. The FAUST benchmark defines
26+
100 preselected scan pairs, partitioned into two classes – 60 requiring intra-subject matching,
27+
40 requiring inter-subject matching.
28+
29+
The dataset required to be downloaded manually from the [website](http://faust.is.tue.mpg.de/)
30+
and extracted in the correct location. For information about where to place the dataset, refer to the example section.
31+
32+
33+
# Dataset Variables
34+
35+
- `scans`: Vector of non-watertight scans in the form of `Mesh`.
36+
- `registrations`: Vector of registrations corresponding to each scan in `scans`. `registrations` like `scans` are also in the form of `Mesh`.
37+
- `labels`: For each scan in the training set, we provide the boolean Vector of length equal to the number of vertices in the corresponding scan. It represents which vertices were reliably registered by the corresponding registration.
38+
- `metadata`: A dictionary containing additional information on the dataset. Currently only `:test` split has metadata containing information about the registrations required for the inter and intra challenge proposed by the author.
39+
40+
# Examples
41+
42+
## Loading the dataset
43+
44+
```julia-repl
45+
julia> using MLDatasets
46+
47+
julia> dataset = FAUST()
48+
[ Info: This program requested access to the data dependency MPI-FAUST
49+
[ Info: It could not be found on your system. It requires manual installation.
50+
┌ Info: Please install it to one of the directories in the DataDeps load path: /home/user/.julia/packages/DataDeps/EDWdQ/deps/data/MPI-FAUST,
51+
│ /home/user/.julia/datadeps/MPI-FAUST,
52+
│ /home/user/.julia/juliaup/julia-1.7.3+0.x86/local/share/julia/datadeps/MPI-FAUST,
53+
│ /home/user/.julia/juliaup/julia-1.7.3+0.x86/share/julia/datadeps/MPI-FAUST,
54+
│ /home/user/datadeps/MPI-FAUST,
55+
│ /scratch/datadeps/MPI-FAUST,
56+
│ /staging/datadeps/MPI-FAUST,
57+
│ /usr/share/datadeps/MPI-FAUST,
58+
└ or /usr/local/share/datadeps/MPI-FAUST
59+
[ Info: by following the instructions:
60+
┌ Info: Dataset: MPI-FAUST.
61+
└ Website: http://faust.is.tue.mpg.de/
62+
Once installed please enter 'y' reattempt loading, or 'a' to abort
63+
[y/a]
64+
```
65+
Now download and extract the dataset into one of the given locations. For unix link systems, an example command can be
66+
```bash
67+
unzip -q <path-to-filename</filename.zip ~/.julia/datadeps
68+
```
69+
The corresponding folder tree should look like
70+
```
71+
├── test
72+
│   ├── challenge_pairs
73+
│   └── scans
74+
└── training
75+
├── ground_truth_vertices
76+
├── registrations
77+
└── scans
78+
```
79+
Press `y` to re-attept loading.
80+
```julia-repl
81+
dataset FAUST:
82+
scans => 100-element Vector{Any}
83+
registrations => 100-element Vector{Any}
84+
labels => 100-element Vector{Vector{Bool}}
85+
metadata => Dict{String, Any} with 0 entries
86+
```
87+
88+
## Load train and test split
89+
90+
```julia-repl
91+
julia> train_faust = FAUST(:train)
92+
dataset FAUST:
93+
scans => 100-element Vector{Any}
94+
registrations => 100-element Vector{Any}
95+
labels => 100-element Vector{Vector{Bool}}
96+
metadata => Dict{String, Any} with 0 entries
97+
98+
julia> test_faust = FAUST(:test)
99+
dataset FAUST:
100+
scans => 200-element Vector{Any}
101+
registrations => 0-element Vector{Any}
102+
labels => 0-element Vector{Vector{Bool}}
103+
metadata => Dict{String, Any} with 2 entries
104+
```
105+
106+
## Scan, registrations and ground-truth
107+
108+
```julia-repl
109+
julia> dataset = FAUST(); # defaults to train split
110+
111+
julia> scan = dataset.scans[1] # pick one scan
112+
Mesh{3, Float32, Triangle}:
113+
Triangle(Float32[-0.0045452323, 0.08537669, 0.22134435], Float32[-0.0030340434, 0.08542955, 0.22206494],
114+
Float32[-0.0042151767, 0.08697654, 0.22171047])
115+
Triangle(Float32[-0.05358432, 0.08490027, 0.17748278], Float32[-0.05379858, 0.083174236, 0.17670263],
116+
Float32[-0.052645437, 0.08346437, 0.17816517])
117+
.
118+
.
119+
.
120+
Triangle(Float32[-0.07851, -1.0956081, 0.07093428], Float32[-0.06905176, -1.0986279, 0.07775441],
121+
Float32[-0.069199145, -1.0928112, 0.06812464])
122+
123+
julia> registration = dataset.registrations[1] # The corresponding registration
124+
Mesh{3, Float32, Triangle}:
125+
Triangle(Float32[0.12491254, 0.51199615, 0.29041073], Float32[0.11376736, 0.5156298, 0.3007352],
126+
Float32[0.119374536, 0.50043654, 0.29687837])
127+
Triangle(Float32[0.119374536, 0.50043654, 0.29687837], Float32[0.11376736, 0.5156298, 0.3007352],
128+
Float32[0.10888693, 0.5008964, 0.30557302])
129+
.
130+
.
131+
.
132+
Triangle(Float32[0.033744745, 0.030968456, 0.2359996], Float32[0.058017172, 0.044458304, 0.23422624],
133+
Float32[0.03615713, 0.04858183, 0.23596591])
134+
135+
julia> label = dataset.labels[1] # The ground-truth/labels for each vertices in scan
136+
176387-element Vector{Bool}:
137+
1
138+
1
139+
1
140+
.
141+
.
142+
.
143+
0
144+
0
145+
0
146+
```
147+
148+
# Refereneces
149+
150+
1. [MPI Faust Website](http://faust.is.tue.mpg.de/)
151+
152+
2. Bogo, Federica & Romero, Javier & Loper, Matthew & Black, Michael. (2014). FAUST: Dataset
153+
and evaluation for 3D mesh registration. Proceedings of the IEEE Computer Society Conference
154+
on Computer Vision and Pattern Recognition. 10.1109/CVPR.2014.491.
155+
"""
156+
struct FAUST <: AbstractDataset
157+
scans::Vector
158+
registrations::Vector
159+
labels::Vector{Vector{Bool}}
160+
metadata::Dict{String, Any}
161+
end
162+
163+
function FAUST(split=:train; dir=nothing)
164+
isnothing(dir) && (dir = datadep"MPI-FAUST")
165+
166+
@assert split [:train, :test] "Only train and test splits are present in the dataset."
167+
168+
registrations = []
169+
scans = []
170+
labels = []
171+
if split == :train
172+
trainig_dir = joinpath(dir, "training")
173+
reg_dir = joinpath(trainig_dir, "registrations")
174+
scan_dir = joinpath(trainig_dir, "scans")
175+
gt_dir = joinpath(trainig_dir, "ground_truth_vertices")
176+
for i in range(0, 99)
177+
reg_file = @sprintf("tr_reg_%03d.ply", i)
178+
scan_file = @sprintf("tr_scan_%03d.ply", i)
179+
gt_file = @sprintf("tr_gt_%03d.txt", i)
180+
scan = load(joinpath(scan_dir, scan_file))
181+
registration = load(joinpath(reg_dir, reg_file))
182+
gt = open(joinpath(gt_dir, gt_file)) do file
183+
s = readlines(file)
184+
map(x-> x == "1", s)
185+
end
186+
push!(scans, scan)
187+
push!(registrations, registration)
188+
push!(labels, gt)
189+
end
190+
return FAUST(scans, registrations, labels, Dict())
191+
else
192+
scan_dir = joinpath(dir, "test", "scans")
193+
for i in range(0, 199)
194+
scan_file = @sprintf("test_scan_%03d.ply", i)
195+
scan = load(joinpath(scan_dir, scan_file))
196+
push!(scans, scan)
197+
end
198+
interfile = joinpath(dir, "test", "challenge_pairs", "inter_challenge.txt")
199+
intrafile = joinpath(dir, "test", "challenge_pairs", "intra_challenge.txt")
200+
inter_pairs = read_challenge_file(interfile)
201+
intra_pairs = read_challenge_file(intrafile)
202+
metadata = Dict("Inter_Pairs" => inter_pairs, "Intra_Pairs" => intra_pairs)
203+
return FAUST(scans, registrations, labels, metadata)
204+
end
205+
end
206+
207+
function read_challenge_file(filename::String)::Vector{Tuple{Int, Int}}
208+
pairs = open(filename) do file
209+
s = readlines(file)
210+
map(x -> Tuple(parse.(Int, (split(x, "_")))), s)
211+
end
212+
return pairs
213+
end

test/datasets/meshes.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# requires manual downloading
2+
@testset "MPI-FAUST" begin
3+
train_data = FAUST()
4+
test_data = FAUST(:test)
5+
@assert length(train_data.scans) == 100
6+
@assert length(train_data.scans) == length(train_data.registrations)
7+
@assert length(train_data.scans) == length(train_data.labels)
8+
@assert length(test_data.scans) == 200
9+
end

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ no_ci_dataset_tests = [
2828
"datasets/vision/emnist.jl",
2929
"datasets/vision/omniglot.jl",
3030
"datasets/vision/svhn2.jl",
31+
"datasets/meshes.jl"
3132
]
3233

3334
@assert isempty(intersect(dataset_tests, no_ci_dataset_tests))

0 commit comments

Comments
 (0)