Add Faust Dataset (#160)

Dsantra92 · CarloLucibello · web-flow · commit e8ec56e86e6e · 2022-09-02T00:04:55.000+02:00
* Add basic dataset

* Add Docs

* Add manual tests

* Add review changes

* Break long lines

* Update src/datasets/meshes/faust.jl

* Update src/datasets/meshes/faust.jl

* Update src/datasets/meshes/faust.jl

* add branching

* Add proper spaces for julia-repl

* Fix test example

* remove backslash and join lines

Co-authored-by: Carlo Lucibello &lt;carlo.lucibello@gmail.com&gt;
diff --git a/Project.toml b/Project.toml
@@ -20,6 +20,7 @@ MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
 NPZ = "15e1cf62-19b3-5cfa-8e77-841668bca605"
 Pickle = "fbb45041-c46e-462f-888f-7c521cafbc2c"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
diff --git a/docs/make.jl b/docs/make.jl
@@ -24,6 +24,7 @@ makedocs(
         "Home" => "index.md",
         "Datasets" => Any[
             "Graphs" => "datasets/graphs.md",
+            "Meshes" => "datasets/meshes.md",
             "Miscellaneous" => "datasets/misc.md",
             "Text" => "datasets/text.md",
             "Vision" => "datasets/vision.md",
diff --git a/docs/src/datasets/meshes.md b/docs/src/datasets/meshes.md
@@ -0,0 +1,15 @@
+# Mesh Datasets
+
+Mesh datasets contains data in the form of `Meshes.Mesh`. See [Meshes.jl](https://juliageometry.github.io/Meshes.jl/stable/) for a better understanding of Meshes.
+
+## Index
+
+```@index
+Pages = ["meshes.md"]
+```
+
+## Documentation
+
+```@docs
+FAUST
+```
diff --git a/src/MLDatasets.jl b/src/MLDatasets.jl
@@ -6,6 +6,7 @@ using Tables
 using DataDeps
 import MLUtils
 using MLUtils: getobs, numobs, AbstractDataContainer
+using Printf
 using Glob
 using DelimitedFiles: readdlm
 using FileIO
@@ -124,6 +125,11 @@ export Reddit
 include("datasets/graphs/tudataset.jl")
 export TUDataset
 
+# Meshes
+
+include("datasets/meshes/faust.jl")
+export FAUST
+
 function __init__()
     # TODO automatically find and execute __init__xxx functions
 
@@ -154,6 +160,9 @@ function __init__()
     __init__mnist()
     __init__omniglot()
     __init__svhn2()
+
+    # mesh
+    __init__faust()
 end
 
 end #module
diff --git a/src/abstract_datasets.jl b/src/abstract_datasets.jl
@@ -19,22 +19,22 @@ end
 
 function Base.show(io::IO, ::MIME"text/plain", d::D) where D <: AbstractDataset
     recur_io = IOContext(io, :compact => false)
-    
+
     print(io, "dataset $(D.name.name):")  # if the type is parameterized don't print the parameters
-    
-    for f in fieldnames(D)
-        if !startswith(string(f), "_")
-            fstring = leftalign(string(f), 10)
-            print(recur_io, "\n  $fstring  =>    ")
-            # show(recur_io, MIME"text/plain"(), getfield(d, f))
-            # println(recur_io)
-            print(recur_io, "$(_summary(getfield(d, f)))")
-        end
+
+    fnames = filter(!startswith("_"), string.(fieldnames(D)))
+    f_length = max(length.(fnames)...)
+    for f in fnames
+        fstring = leftalign(f, f_length)
+        print(recur_io, "\n  $fstring  =>    ")
+        # show(recur_io, MIME"text/plain"(), getfield(d, f))
+        # println(recur_io)
+        print(recur_io, "$(_summary(getfield(d, Symbol(f))))")
     end
 end
 
 function leftalign(s::AbstractString, n::Int)
-    m = length(s) 
+    m = length(s)
     if m > n
         return s[1:n]
     else
@@ -60,7 +60,7 @@ a `features` and a `targets` fields.
 abstract type SupervisedDataset <: AbstractDataset end
 
 
-Base.length(d::SupervisedDataset) = Tables.istable(d.features) ? numobs_table(d.features) : 
+Base.length(d::SupervisedDataset) = Tables.istable(d.features) ? numobs_table(d.features) :
                                                                  numobs((d.features, d.targets))
 
 
@@ -69,7 +69,7 @@ Base.getindex(d::SupervisedDataset, ::Colon) = Tables.istable(d.features) ?
     (features = d.features, targets=d.targets) :
     getobs((; d.features, d.targets))
 
-Base.getindex(d::SupervisedDataset, i) = Tables.istable(d.features) ? 
+Base.getindex(d::SupervisedDataset, i) = Tables.istable(d.features) ?
     (features = getobs_table(d.features, i), targets=getobs_table(d.targets, i)) :
     getobs((; d.features, d.targets), i)
 
@@ -99,13 +99,13 @@ const ARGUMENTS_SUPERVISED_TABLE = """
 
 const FIELDS_SUPERVISED_TABLE = """
 - `metadata`: A dictionary containing additional information on the dataset.
-- `features`: The data features. An array if `as_df=true`, otherwise a dataframe. 
+- `features`: The data features. An array if `as_df=true`, otherwise a dataframe.
 - `targets`: The targets for supervised learning. An array if `as_df=true`, otherwise a dataframe.
 - `dataframe`: A dataframe containing both `features` and `targets`. It is `nothing` if `as_df=false`.
 """
 
 const METHODS_SUPERVISED_TABLE = """
-- `dataset[i]`: Return observation(s) `i` as a named tuple of features and targets. 
+- `dataset[i]`: Return observation(s) `i` as a named tuple of features and targets.
 - `dataset[:]`: Return all observations as a named tuple of features and targets.
 - `length(dataset)`: Number of observations.
 """
@@ -119,12 +119,12 @@ const ARGUMENTS_SUPERVISED_ARRAY = """
 
 const FIELDS_SUPERVISED_ARRAY = """
 - `metadata`: A dictionary containing additional information on the dataset.
-- `features`: An array storing the data features. 
+- `features`: An array storing the data features.
 - `targets`: An array storing the targets for supervised learning.
 """
 
 const METHODS_SUPERVISED_ARRAY = """
-- `dataset[i]`: Return observation(s) `i` as a named tuple of features and targets. 
+- `dataset[i]`: Return observation(s) `i` as a named tuple of features and targets.
 - `dataset[:]`: Return all observations as a named tuple of features and targets.
 - `length(dataset)`: Number of observations.
 """
diff --git a/src/datasets/meshes/faust.jl b/src/datasets/meshes/faust.jl
@@ -0,0 +1,213 @@
+function __init__faust()
+    DEPNAME = "MPI-FAUST"
+    DOCS = "http://faust.is.tue.mpg.de/"
+
+    register(ManualDataDep(
+        DEPNAME,
+        """
+        Dataset: $DEPNAME.
+        Website: $DOCS
+        """,
+    ))
+end
+
+"""
+    FAUST(split=:train; dir=nothing)
+
+The MPI FAUST dataset (2014).
+
+FAUST contains 300 real, high-resolution human scans of 10 different subjects in 30 different poses,
+with automatically computed ground-truth correspondences.
+
+Each scan is a high-resolution, triangulated, non-watertight mesh acquired with a 3D multi-stereo system.
+
+FAUST is subdivided into a training and a test set. The training set includes 100 scans (10 per subject)
+with their corresponding ground-truth alignments. The test set includes 200 scans. The FAUST benchmark defines
+100 preselected scan pairs, partitioned into two classes – 60 requiring intra-subject matching,
+40 requiring inter-subject matching.
+
+The dataset required to be downloaded manually from the [website](http://faust.is.tue.mpg.de/)
+and extracted in the correct location. For information about where to place the dataset, refer to the example section.
+
+
+# Dataset Variables
+
+- `scans`: Vector of non-watertight scans in the form of `Mesh`.
+- `registrations`: Vector of registrations corresponding to each scan in `scans`. `registrations` like `scans` are also in the form of `Mesh`.
+- `labels`: For each scan in the training set, we provide the boolean Vector of length equal to the number of vertices in the corresponding scan. It represents which vertices were reliably registered by the corresponding registration.
+- `metadata`: A dictionary containing additional information on the dataset. Currently only `:test` split has metadata containing information about the registrations required for the inter and intra challenge proposed by the author.
+
+# Examples
+
+## Loading the dataset
+
+```julia-repl
+julia> using MLDatasets
+
+julia> dataset = FAUST()
+[ Info: This program requested access to the data dependency MPI-FAUST
+[ Info: It could not be found on your system. It requires manual installation.
+┌ Info: Please install it to one of the directories in the DataDeps load path: /home/user/.julia/packages/DataDeps/EDWdQ/deps/data/MPI-FAUST,
+│ /home/user/.julia/datadeps/MPI-FAUST,
+│ /home/user/.julia/juliaup/julia-1.7.3+0.x86/local/share/julia/datadeps/MPI-FAUST,
+│ /home/user/.julia/juliaup/julia-1.7.3+0.x86/share/julia/datadeps/MPI-FAUST,
+│ /home/user/datadeps/MPI-FAUST,
+│ /scratch/datadeps/MPI-FAUST,
+│ /staging/datadeps/MPI-FAUST,
+│ /usr/share/datadeps/MPI-FAUST,
+└ or /usr/local/share/datadeps/MPI-FAUST
+[ Info: by following the instructions:
+┌ Info: Dataset: MPI-FAUST.
+└ Website: http://faust.is.tue.mpg.de/
+Once installed please enter 'y' reattempt loading, or 'a' to abort
+[y/a]
+```
+Now download and extract the dataset into one of the given locations. For unix link systems, an example command can be
+```bash
+unzip -q <path-to-filename</filename.zip ~/.julia/datadeps
+```
+The corresponding folder tree should look like
+```
+├── test
+│   ├── challenge_pairs
+│   └── scans
+└── training
+    ├── ground_truth_vertices
+    ├── registrations
+    └── scans
+```
+Press `y` to re-attept loading.
+```julia-repl
+dataset FAUST:
+  scans          =>    100-element Vector{Any}
+  registrations  =>    100-element Vector{Any}
+  labels         =>    100-element Vector{Vector{Bool}}
+  metadata       =>    Dict{String, Any} with 0 entries
+```
+
+## Load train and test split
+
+```julia-repl
+julia> train_faust = FAUST(:train)
+dataset FAUST:
+  scans          =>    100-element Vector{Any}
+  registrations  =>    100-element Vector{Any}
+  labels         =>    100-element Vector{Vector{Bool}}
+  metadata       =>    Dict{String, Any} with 0 entries
+
+julia> test_faust = FAUST(:test)
+dataset FAUST:
+  scans          =>    200-element Vector{Any}
+  registrations  =>    0-element Vector{Any}
+  labels         =>    0-element Vector{Vector{Bool}}
+  metadata       =>    Dict{String, Any} with 2 entries
+```
+
+## Scan, registrations and ground-truth
+
+```julia-repl
+julia> dataset = FAUST(); # defaults to train split
+
+julia> scan = dataset.scans[1] # pick one scan
+Mesh{3, Float32, Triangle}:
+ Triangle(Float32[-0.0045452323, 0.08537669, 0.22134435], Float32[-0.0030340434, 0.08542955, 0.22206494],
+Float32[-0.0042151767, 0.08697654, 0.22171047])
+ Triangle(Float32[-0.05358432, 0.08490027, 0.17748278], Float32[-0.05379858, 0.083174236, 0.17670263],
+Float32[-0.052645437, 0.08346437, 0.17816517])
+.
+.
+.
+ Triangle(Float32[-0.07851, -1.0956081, 0.07093428], Float32[-0.06905176, -1.0986279, 0.07775441],
+Float32[-0.069199145, -1.0928112, 0.06812464])
+
+julia> registration = dataset.registrations[1] # The corresponding registration
+Mesh{3, Float32, Triangle}:
+ Triangle(Float32[0.12491254, 0.51199615, 0.29041073], Float32[0.11376736, 0.5156298, 0.3007352],
+Float32[0.119374536, 0.50043654, 0.29687837])
+ Triangle(Float32[0.119374536, 0.50043654, 0.29687837], Float32[0.11376736, 0.5156298, 0.3007352],
+Float32[0.10888693, 0.5008964, 0.30557302])
+.
+.
+.
+ Triangle(Float32[0.033744745, 0.030968456, 0.2359996], Float32[0.058017172, 0.044458304, 0.23422624],
+Float32[0.03615713, 0.04858183, 0.23596591])
+
+julia> label = dataset.labels[1] # The ground-truth/labels for each vertices in scan
+176387-element Vector{Bool}:
+ 1
+ 1
+ 1
+ .
+ .
+ .
+ 0
+ 0
+ 0
+```
+
+# Refereneces
+
+1. [MPI Faust Website](http://faust.is.tue.mpg.de/)
+
+2. Bogo, Federica & Romero, Javier & Loper, Matthew & Black, Michael. (2014). FAUST: Dataset
+and evaluation for 3D mesh registration. Proceedings of the IEEE Computer Society Conference
+on Computer Vision and Pattern Recognition. 10.1109/CVPR.2014.491.
+"""
+struct FAUST <: AbstractDataset
+    scans::Vector
+    registrations::Vector
+    labels::Vector{Vector{Bool}}
+    metadata::Dict{String, Any}
+end
+
+function FAUST(split=:train; dir=nothing)
+    isnothing(dir) && (dir = datadep"MPI-FAUST")
+
+    @assert split ∈ [:train, :test] "Only train and test splits are present in the dataset."
+
+    registrations = []
+    scans = []
+    labels = []
+    if split == :train
+        trainig_dir = joinpath(dir, "training")
+        reg_dir = joinpath(trainig_dir, "registrations")
+        scan_dir = joinpath(trainig_dir, "scans")
+        gt_dir = joinpath(trainig_dir, "ground_truth_vertices")
+        for i in range(0, 99)
+            reg_file = @sprintf("tr_reg_%03d.ply", i)
+            scan_file = @sprintf("tr_scan_%03d.ply", i)
+            gt_file = @sprintf("tr_gt_%03d.txt", i)
+            scan = load(joinpath(scan_dir, scan_file))
+            registration = load(joinpath(reg_dir, reg_file))
+            gt = open(joinpath(gt_dir, gt_file)) do file
+                s = readlines(file)
+                map(x-> x == "1", s)
+            end
+            push!(scans, scan)
+            push!(registrations, registration)
+            push!(labels, gt)
+        end
+        return FAUST(scans, registrations, labels, Dict())
+    else
+        scan_dir = joinpath(dir, "test", "scans")
+        for i in range(0, 199)
+            scan_file = @sprintf("test_scan_%03d.ply", i)
+            scan = load(joinpath(scan_dir, scan_file))
+            push!(scans, scan)
+        end
+        interfile = joinpath(dir, "test", "challenge_pairs", "inter_challenge.txt")
+        intrafile = joinpath(dir, "test", "challenge_pairs", "intra_challenge.txt")
+        inter_pairs = read_challenge_file(interfile)
+        intra_pairs = read_challenge_file(intrafile)
+        metadata = Dict("Inter_Pairs" => inter_pairs, "Intra_Pairs" => intra_pairs)
+        return FAUST(scans, registrations, labels, metadata)
+    end
+end
+
+function read_challenge_file(filename::String)::Vector{Tuple{Int, Int}}
+    pairs = open(filename) do file
+        s = readlines(file)
+        map(x -> Tuple(parse.(Int, (split(x, "_")))), s)
+    end
+    return pairs
+end
diff --git a/test/datasets/meshes.jl b/test/datasets/meshes.jl
@@ -0,0 +1,9 @@
+# requires manual downloading
+@testset "MPI-FAUST" begin
+    train_data  = FAUST()
+    test_data  = FAUST(:test)
+    @assert length(train_data.scans) == 100
+    @assert length(train_data.scans) == length(train_data.registrations)
+    @assert length(train_data.scans) == length(train_data.labels)
+    @assert length(test_data.scans) == 200
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -28,6 +28,7 @@ no_ci_dataset_tests = [
     "datasets/vision/emnist.jl",
     "datasets/vision/omniglot.jl",
     "datasets/vision/svhn2.jl",
+    "datasets/meshes.jl"
     ]
 
 @assert isempty(intersect(dataset_tests, no_ci_dataset_tests))

Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,7 @@ no_ci_dataset_tests = [`
`28`	`28`	`"datasets/vision/emnist.jl",`
`29`	`29`	`"datasets/vision/omniglot.jl",`
`30`	`30`	`"datasets/vision/svhn2.jl",`
	`31`	`+ "datasets/meshes.jl"`
`31`	`32`	`]`
`32`	`33`
`33`	`34`	`@assert isempty(intersect(dataset_tests, no_ci_dataset_tests))`