Performance improvements: type stability and reduced allocations

claude · claude · commit 07ac4cdd1e97 · 2026-01-07T19:28:40.000-05:00
Optimizations made: - `extract_coefficients` (util.jl): Fixed type instability with `findfirst` returning Union{Nothing, Int}. Now uses explicit type-stable loop. Also pre-computes array lengths outside loops for better performance. - `monomial_compress` (wronskian.jl): Replaced `Array{Any, 1}` with typed `Vector{Tuple{P, T}}` for type stability. Pre-computes parameter names in a Set for O(1) lookup instead of repeated map operations. - `massive_eval` (wronskian.jl): Uses typed containers (Set{Vector{Int}}, Dict{Vector{Int}, T}) instead of untyped. Pre-allocates working arrays and uses in-place operations with @inbounds. Pre-sizes result array. - `det_minor_expansion_inner` (elimination.jl): Replaced `in keys(cache)` with `haskey(cache)` for better performance. Pre-allocates Sets for discarded rows/cols. Uses `sort!` on mutable arrays instead of allocating. These changes improve type stability and reduce unnecessary allocations in hot code paths, particularly benefiting larger ODE systems. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/src/elimination.jl b/src/elimination.jl
@@ -11,21 +11,30 @@ function det_minor_expansion_inner(
     if length(discarded[1]) == n
         return 1
     end
-    if discarded in keys(cache)
+    if haskey(cache, discarded)
         return cache[discarded]
     end
     result = 0
-    row = minimum(setdiff(Set(1:n), Set(discarded[1])))
-    dis_rows = Tuple(sort([[i for i in discarded[1]]; row]))
+    # Use pre-allocated Set for better performance
+    discarded_rows_set = Set(discarded[1])
+    row = 0
+    @inbounds for i in 1:n
+        if !(i in discarded_rows_set)
+            row = i
+            break
+        end
+    end
+    dis_rows = Tuple(sort!([discarded[1]..., row]))
     sign = 1
+    discarded_cols_set = Set(discarded[2])
     for col in 1:n
-        if !(col in discarded[2])
-            dis_cols = Tuple(sort([[i for i in discarded[2]]; col]))
+        if !(col in discarded_cols_set)
+            dis_cols = Tuple(sort!([discarded[2]..., col]))
             result +=
                 sign *
                 m[row, col] *
                 det_minor_expansion_inner(m, (dis_rows, dis_cols), cache)
-            sign = -1 * sign
+            sign = -sign
         end
     end
     if length(discarded[1]) > 1
diff --git a/src/util.jl b/src/util.jl
@@ -192,7 +192,12 @@ Output:
 function extract_coefficients(poly::P, variables::Array{P, 1}) where {P <: MPolyRingElem}
     xs = gens(parent(poly))
     @assert all(in(xs), variables)
-    cut_indices = map(v -> findfirst(x -> x == v, xs), variables)
+    # Use a type-stable version by converting to Int explicitly
+    cut_indices = Vector{Int}(undef, length(variables))
+    for (j, v) in enumerate(variables)
+        idx = findfirst(x -> x == v, xs)
+        cut_indices[j] = idx::Int  # Assert non-nothing for type stability
+    end
     coeff_indices = setdiff(collect(1:length(xs)), cut_indices)
     coeff_vars = xs[coeff_indices]
 
@@ -202,20 +207,25 @@ function extract_coefficients(poly::P, variables::Array{P, 1}) where {P <: MPoly
 
     result = Dict{Vector{Int}, Tuple{Vector{Vector{Int}}, Vector{FieldType}}}()
 
+    n_cut = length(cut_indices)
+    n_coeff = length(coeff_indices)
     @inbounds for i in 1:length(poly)
         coef = coeff(poly, i)
         evec = exponent_vector(poly, i)
-        var_slice = [evec[i] for i in cut_indices]
+        var_slice = Vector{Int}(undef, n_cut)
+        for j in 1:n_cut
+            var_slice[j] = evec[cut_indices[j]]
+        end
         if !haskey(result, var_slice)
             monom_vect, coef_vect = Vector{Vector{Int}}(), Vector{FieldType}()
             sizehint!(monom_vect, 8)
             sizehint!(coef_vect, 8)
             result[var_slice] = (monom_vect, coef_vect)
         end
         monom_vect, coef_vect = result[var_slice]
-        new_monom = Vector{Int}(undef, length(coeff_vars))
-        for i in 1:length(new_monom)
-            new_monom[i] = evec[coeff_indices[i]]
+        new_monom = Vector{Int}(undef, n_coeff)
+        for j in 1:n_coeff
+            new_monom[j] = evec[coeff_indices[j]]
         end
         push!(monom_vect, new_monom)
         push!(coef_vect, coef)
diff --git a/src/wronskian.jl b/src/wronskian.jl
@@ -22,38 +22,38 @@ end
 
 function monomial_compress(io_equation, params::Array{<:MPolyRingElem, 1})
     params_xs = isempty(params) ? empty(params) : gens(parent(first(params)))
+    # Pre-compute param string names for faster lookup
+    param_names = Set(var_to_str(p, xs = params_xs) for p in params)
     other_vars = [
-        v for v in gens(parent(io_equation)) if
-            !(var_to_str(v) in map(p -> var_to_str(p, xs = params_xs), params))
+        v for v in gens(parent(io_equation)) if !(var_to_str(v) in param_names)
     ]
     coeffdict = extract_coefficients(io_equation, other_vars)
     expvect = collect(keys(coeffdict))
     coeffs = collect(values(coeffdict))
     termlist = map(x -> prod(other_vars .^ x), expvect)
 
-    echelon_form = Array{Any, 1}()
+    # Use typed arrays instead of Array{Any, 1}
+    P = eltype(coeffs)
+    T = eltype(termlist)
+    echelon_form = Vector{Tuple{P, T}}()
+    sizehint!(echelon_form, length(coeffs))
     for (c, p) in zip(coeffs, termlist)
         for i in 1:length(echelon_form)
             basis_c = echelon_form[i][1]
-            coef = coeff(c, leading_monomial(basis_c)) // leading_coefficient(basis_c)
+            lm = leading_monomial(basis_c)
+            coef = coeff(c, lm) // leading_coefficient(basis_c)
             if coef != 0
                 c = c - coef * basis_c
-                echelon_form[i][2] += coef * p
+                # Update in place by creating new tuple
+                echelon_form[i] = (echelon_form[i][1], echelon_form[i][2] + coef * p)
             end
         end
         if c != 0
-            push!(echelon_form, [c, p])
+            push!(echelon_form, (c, p))
         end
     end
 
     result = ([a[1] for a in echelon_form], [a[2] for a in echelon_form])
-    #s = 0
-    #for (a, b) in zip(result[1], result[2])
-    #    s += parent_ring_change(a, parent(io_equation)) * parent_ring_change(b, parent(io_equation))
-    #end
-    #println("====================")
-    #println(s - io_equation)
-
     return result
 end
 
@@ -136,49 +136,65 @@ of lower degree are cached and used to compute the values of the monomials of hi
 """
 function massive_eval(polys, eval_dict)
     R = parent(first(values(eval_dict)))
-    point = [get(eval_dict, v, zero(R)) for v in gens(parent(first(polys)))]
+    poly_ring = parent(first(polys))
+    poly_gens = gens(poly_ring)
+    point = [get(eval_dict, v, zero(R)) for v in poly_gens]
     n = length(point)
 
-    monomials = Set()
+    # Use typed Set for better performance
+    monomials = Set{Vector{Int}}()
     for p in polys
         for exp in exponent_vectors(p)
             push!(monomials, exp)
         end
     end
 
-    cache = Dict()
-    cache[[0 for i in 1:n]] = one(R)
+    # Pre-allocate the zero vector once
+    zero_vec = zeros(Int, n)
+    cache = Dict{Vector{Int}, typeof(one(R))}()
+    cache[zero_vec] = one(R)
     cached_monoms = ExpVectTrie(n)
-    push!(cached_monoms, [0 for _ in 1:n])
+    push!(cached_monoms, zero_vec)
 
+    # Cache unit vectors
     for i in 1:n
-        var_exp = [(i != j) ? 0 : 1 for j in 1:n]
+        var_exp = zeros(Int, n)
+        var_exp[i] = 1
         cache[var_exp] = point[i]
         push!(cached_monoms, var_exp)
     end
 
+    # Pre-allocate working arrays
+    computed = zeros(Int, n)
+    exp_work = zeros(Int, n)
     for exp in sort!(collect(monomials), by = sum)
-        if !(exp in keys(cache))
+        if !haskey(cache, exp)
             monom_val = one(R)
-            computed = [0 for i in 1:n]
-            while sum(exp) > 0
-                _, below = get_max_below(cached_monoms, exp)
+            # Use in-place operations on working arrays
+            fill!(computed, 0)
+            copyto!(exp_work, exp)
+            while sum(exp_work) > 0
+                _, below = get_max_below(cached_monoms, exp_work)
                 monom_val = monom_val * cache[below]
-                exp = exp .- below
-                computed = computed .+ below
-                cache[computed] = monom_val
-                push!(cached_monoms, computed)
+                @inbounds for k in 1:n
+                    exp_work[k] -= below[k]
+                    computed[k] += below[k]
+                end
+                computed_copy = copy(computed)
+                cache[computed_copy] = monom_val
+                push!(cached_monoms, computed_copy)
             end
         end
     end
 
-    results = []
-    for p in polys
+    # Pre-size results array with correct type
+    results = Vector{typeof(zero(R))}(undef, length(polys))
+    for (pidx, p) in enumerate(polys)
         res = zero(R)
         for (exp, coef) in zip(exponent_vectors(p), coefficients(p))
             res += coef * cache[exp]
         end
-        push!(results, res)
+        results[pidx] = res
     end
     return results
 end