DataDrivenDiffEq.jl/lib/DataDrivenSparse/src/commonsolve.jl at ba08877f93a88f7f777f0093136cd75e93ca541a · ChrisRackauckas/DataDrivenDiffEq.jl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
function CommonSolve.solve!(ps::InternalDataDrivenProblem{
        <:AbstractSparseRegressionAlgorithm
})
    @unpack alg, basis, testdata, traindata, problem, options, transform = ps

    results = map(traindata) do (X, Y)
        __sparse_regression(ps, X, Y)
    end

    # Get the best result based on test error, if applicable else use testerror
    sort!(results, by = l2error)

    # Convert to basis
    best_res = first(results)

    # Transform the best coefficients
    coefficients = permutedims(copy(get_coefficients(best_res)))
    coefficients = permutedims(StatsBase.transform(transform, coefficients))
    new_basis = DataDrivenDiffEq.__construct_basis(coefficients, basis, problem, options)

    DataDrivenSolution(new_basis, problem, alg, results, ps, best_res.retcode)
end

function __sparse_regression(
        ps::InternalDataDrivenProblem{
            <:AbstractSparseRegressionAlgorithm
        },
        X::AbstractArray,
        Y::AbstractArray)
    @unpack alg, testdata, options, transform = ps

    coefficients, optimal_thresholds, optimal_iterations = alg(X, Y, options = options)

    trainerror = sum(abs2, Y .- coefficients * X)

    X̃, Ỹ = testdata

    if !isempty(X̃)
        testerror = sum(abs2, Ỹ .- coefficients * X̃)
    else
        testerror = nothing
    end

    retcode = DDReturnCode(1)

    dof = sum(abs.(coefficients) .> 0.0)

    SparseRegressionResult(coefficients, dof, optimal_thresholds,
        optimal_iterations, testerror, trainerror,
        retcode)
end

function __sparse_regression(ps::InternalDataDrivenProblem{<:ImplicitOptimizer},
        X::AbstractArray, Y::AbstractArray)
    @unpack alg, testdata, options, transform, basis, problem, implicit_idx = ps
    @assert DataDrivenDiffEq.is_implicit(basis) "The provided `Basis` does not have implicit variables!"

    candidate_matrix = zeros(Bool, size(implicit_idx))
    idx = ones(Bool, size(candidate_matrix, 2))

    for i in axes(candidate_matrix, 1), j in axes(candidate_matrix, 2)

        idx .= true
        idx[j] = false
        # We want only equations which are either dependent on the variable or on no other
        candidate_matrix[i, j] = implicit_idx[i, j] || sum(implicit_idx[i, idx]) == 0
    end

    opt_coefficients = zeros(eltype(problem), size(candidate_matrix, 2),
        size(candidate_matrix, 1))
    opt_thresholds = []
    opt_iterations = []

    foreach(enumerate(eachcol(candidate_matrix))) do (i, idx)
        # We enforce that one of the implicit variables is necessary for success
        coeff, thresholds,
        iters = alg(X[idx, :], Y, options = options,
            necessary_idx = implicit_idx[idx, i])
        opt_coefficients[i:i, idx] .= coeff
        push!(opt_thresholds, thresholds)
        push!(opt_iterations, iters)
    end

    trainerror = sum(abs2, opt_coefficients * X)

    X̃, Ỹ = testdata

    if !isempty(X̃)
        testerror = sum(abs2, opt_coefficients * X̃)
    else
        testerror = nothing
    end

    retcode = DDReturnCode(1)

    dof = sum(abs.(opt_coefficients) .> 0.0)

    SparseRegressionResult(opt_coefficients, dof, opt_thresholds,
        opt_iterations, testerror, trainerror,
        retcode)
end