Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 0 additions & 44 deletions .github/workflows/check_registry.yml

This file was deleted.

3 changes: 3 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ jobs:
${{ runner.os }}-
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
env:
# This environment variable enables the integration tests:
MLJ_TEST_INTEGRATION: '1'
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v4
with:
Expand Down
22 changes: 13 additions & 9 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,33 +31,37 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
CategoricalArrays = "0.9, 0.10"
CategoricalDistributions = "0.1"
Combinatorics = "1.0"
Dates = "<0.0.1, 1"
Dates = "1"
Distances = "0.9,0.10"
Distributed = "1"
Distributions = "0.25"
InteractiveUtils = "<0.0.1, 1"
LinearAlgebra = "<0.0.1, 1"
Markdown = "<0.0.1, 1"
InteractiveUtils = "1"
LinearAlgebra = "1"
MLJModelInterface = "1.10"
Markdown = "1"
OrderedCollections = "1.1"
Parameters = "0.12"
Pkg = "<0.0.1, 1"
Pkg = "1"
PrettyPrinting = "0.3, 0.4"
Random = "<0.0.1, 1"
Random = "1"
RelocatableFolders = "0.3, 1"
ScientificTypes = "3"
StatisticalTraits = "3"
Statistics = "<0.0.1, 1"
Statistics = "1"
StatsBase = "0.32,0.33, 0.34"
Suppressor = "0.2.8"
Tables = "0.2,1.0"
julia = "1.6"
julia = "1.10"

[extras]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661"
MLJMultivariateStatsInterface = "1b6a4a23-ba22-4f51-9698-8599985d3728"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["MLJBase", "MLJDecisionTreeInterface", "MLJMultivariateStatsInterface", "Pkg", "StableRNGs", "Test"]
test = ["Distributed", "MLJBase", "MLJDecisionTreeInterface", "MLJMultivariateStatsInterface", "Pkg", "StableRNGs", "Suppressor", "Test"]
89 changes: 0 additions & 89 deletions src/GaussianProcesses.jl

This file was deleted.

24 changes: 12 additions & 12 deletions src/MLJModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ export UnivariateDiscretizer,
OneHotEncoder, ContinuousEncoder, FillImputer, UnivariateFillImputer,
UnivariateTimeTypeToContinuous, InteractionTransformer

const srcdir = dirname(@__FILE__) # the directory containing this file
const MMI = MLJModelInterface

if VERSION < v"1.3"
Expand All @@ -64,28 +63,29 @@ include("builtins/Constant.jl")
include("builtins/Transformers.jl")
include("builtins/ThresholdPredictors.jl")

Handle = NamedTuple{(:name, :pkg), Tuple{String,String}}
(::Type{Handle})(name,string) = NamedTuple{(:name, :pkg)}((name, string))
# declare paths to the metadata and associated project file:
const REGISTRY_PROJECT = @path joinpath(@__DIR__, "registry", "Project.toml")
const REGISTRY_METADATA = @path joinpath(@__DIR__, "registry", "Metadata.toml")
Base.include_dependency(REGISTRY_PROJECT)
Base.include_dependency(REGISTRY_METADATA)

# load utilities for reading model metadata from file:
include("metadata.jl")

# read in the metadata:
metadata_file = joinpath(srcdir, "registry", "Metadata.toml")
Base.include_dependency(metadata_file)
const INFO_GIVEN_HANDLE = info_given_handle(metadata_file)
# read in metadata:
const INFO_GIVEN_HANDLE = info_given_handle(REGISTRY_METADATA)
const PKGS_GIVEN_NAME = pkgs_given_name(INFO_GIVEN_HANDLE)
const AMBIGUOUS_NAMES = ambiguous_names(INFO_GIVEN_HANDLE)
const NAMES = model_names(INFO_GIVEN_HANDLE)
const MODEL_TRAITS_IN_REGISTRY = model_traits_in_registry(INFO_GIVEN_HANDLE)

# model search and registry code:
# include tools to search the model registry:
include("model_search.jl")

# include tools to load model code:
include("loading.jl")
include("registry/src/Registry.jl")
using .Registry

# finalize:
include("init.jl")
# include tool for cloning the Model Registry project file:
include("registry_project.jl")

end # module
7 changes: 0 additions & 7 deletions src/init.jl

This file was deleted.

92 changes: 25 additions & 67 deletions src/metadata.jl
Original file line number Diff line number Diff line change
@@ -1,25 +1,5 @@
## UTILITIES FOR ENCODING AND DECODING MODEL METADATA
# (for serializing/deserializing into TOML format)

# fallback encoding:
function encode_dic(s)
prestring = string("`", s, "`")
# hack for objects with gensyms in their string representation:
str = replace(prestring, '#'=>'_')
return str
end

encode_dic(s::AbstractString) = string(s)
encode_dic(s::Symbol) = string(":", s)
encode_dic(s::Nothing) = "`nothing`"
encode_dic(v::AbstractVector) = encode_dic.(v)
function encode_dic(d::AbstractDict)
ret = LittleDict{}()
for (k, v) in d
ret[encode_dic(k)] = encode_dic(v)
end
return ret
end
# # DECODING MODEL METADATA
# (deserializing TOML dictionary)

function decode_dic(s::String)
if !isempty(s)
Expand Down Expand Up @@ -51,34 +31,11 @@ function decode_dic(d::AbstractDict)
return ret
end

# the inverse of a multivalued dictionary is a multivalued
# dictionary:
function inverse(d::LittleDict{S,Set{T}}) where {S,T}
dinv = LittleDict{T,Set{S}}()
for key in keys(d)
for val in d[key]
if val in keys(dinv)
push!(dinv[val], key)
else
dinv[val] = Set([key,])
end
end
end
return dinv
end
function inverse(d::Dict{S,Set{T}}) where {S,T}
dinv = Dict{T,Set{S}}()
for key in keys(d)
for val in d[key]
if val in keys(dinv)
push!(dinv[val], key)
else
dinv[val] = Set([key,])
end
end
end
return dinv
end

# # MODEL HANDLES

Handle = NamedTuple{(:name, :pkg), Tuple{String,String}}
(::Type{Handle})(name,string) = NamedTuple{(:name, :pkg)}((name, string))

function Base.isless(h1::Handle, h2::Handle)
if isless(h1.name, h2.name)
Expand All @@ -90,30 +47,39 @@ function Base.isless(h1::Handle, h2::Handle)
end
end

function (::Type{Handle})(name::String)
if name in AMBIGUOUS_NAMES
return Handle(name, missing)
else
return Handle(name, first(PKGS_GIVEN_NAME[name]))
end
end


## FUNCTIONS TO BUILD GLOBAL METADATA CONSTANTS IN MLJMODELS
## INITIALIZATION
# # FUNCTIONS TO BUILD GLOBAL METADATA CONSTANTS

# to define INFO_GIVEN_HANDLE
function info_given_handle(metadata_file)
metadata = LittleDict(TOML.parsefile(metadata_file))
metadata_given_pkg = decode_dic(metadata)
metadata_given_api_pkg = decode_dic(metadata)

# build info_given_handle dictionary:
ret = Dict{Handle}{Any}()
packages = keys(metadata_given_pkg)
for pkg in packages
info_given_name = metadata_given_pkg[pkg]
packages = keys(metadata_given_api_pkg)
for api_pkg in packages
info_given_name = metadata_given_api_pkg[api_pkg]
for name in keys(info_given_name)
info = info_given_name[name]
pkg = info[:package_name]
handle = Handle(name, pkg)
ret[handle] = info_given_name[name]
ret[handle] = info
end
end
return ret

end

# for use in __init__ to define AMBIGUOUS_NAMES
# to define AMBIGUOUS_NAMES
function ambiguous_names(info_given_handle)
names_with_duplicates = map(keys(info_given_handle) |> collect) do handle
handle.name
Expand All @@ -124,7 +90,7 @@ function ambiguous_names(info_given_handle)
end
end

# for use in __init__ to define PKGS_GIVEN_NAME
# to define PKGS_GIVEN_NAME
function pkgs_given_name(info_given_handle)
handles = keys(info_given_handle) |> collect
ret = Dict{String,Vector{String}}()
Expand All @@ -145,14 +111,6 @@ function model_names(info_given_handle)
return unique(names_allowing_duplicates)
end

function (::Type{Handle})(name::String)
if name in AMBIGUOUS_NAMES
return Handle(name, missing)
else
return Handle(name, first(PKGS_GIVEN_NAME[name]))
end
end

function model_traits_in_registry(info_given_handle)
first_entry = info_given_handle[Handle("ConstantRegressor")]
return keys(first_entry) |> collect
Expand Down
Loading
Loading