diff --git a/docs/src/solvers/solvers.md b/docs/src/solvers/solvers.md
index 1290a8fa2..71a12edc8 100644
--- a/docs/src/solvers/solvers.md
+++ b/docs/src/solvers/solvers.md
@@ -32,6 +32,24 @@ this is only recommended for Float32 matrices. Choose `CudaOffloadLUFactorizatio
 performance on well-conditioned problems, or `CudaOffloadQRFactorization` for better numerical 
 stability on ill-conditioned problems.
 
+#### Mixed Precision Methods
+
+For large well-conditioned problems where memory bandwidth is the bottleneck, mixed precision 
+methods can provide significant speedups (up to 2x) by performing the factorization in Float32 
+while maintaining Float64 interfaces. These methods are particularly effective for:
+- Large dense matrices (> 1000x1000)
+- Well-conditioned problems (condition number < 10^4)
+- Hardware with good Float32 performance
+
+Available mixed precision solvers:
+- `MKL32MixedLUFactorization` - CPUs with MKL
+- `AppleAccelerate32MixedLUFactorization` - Apple CPUs with Accelerate
+- `CUDAOffload32MixedLUFactorization` - NVIDIA GPUs with CUDA
+- `MetalOffload32MixedLUFactorization` - Apple GPUs with Metal
+
+These methods automatically handle the precision conversion, making them easy drop-in replacements
+when reduced precision is acceptable for the factorization step.
+
 !!! note
     
     Performance details for dense LU-factorizations can be highly dependent on the hardware configuration.
@@ -205,6 +223,7 @@ KrylovJL
 
 ```@docs
 MKLLUFactorization
+MKL32MixedLUFactorization
 ```
 
 ### AppleAccelerate.jl
@@ -215,6 +234,7 @@ MKLLUFactorization
 
 ```@docs
 AppleAccelerateLUFactorization
+AppleAccelerate32MixedLUFactorization
 ```
 
 ### Metal.jl
@@ -225,6 +245,7 @@ AppleAccelerateLUFactorization
 
 ```@docs
 MetalLUFactorization
+MetalOffload32MixedLUFactorization
 ```
 
 ### Pardiso.jl
@@ -251,6 +272,7 @@ The following are non-standard GPU factorization routines.
 ```@docs
 CudaOffloadLUFactorization
 CudaOffloadQRFactorization
+CUDAOffload32MixedLUFactorization
 ```
 
 ### AMDGPU.jl
diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl
index eab6ee401..503ee71fb 100644
--- a/ext/LinearSolveCUDAExt.jl
+++ b/ext/LinearSolveCUDAExt.jl
@@ -7,6 +7,7 @@ using LinearSolve: LinearSolve, is_cusparse, defaultalg, cudss_loaded, DefaultLi
                    needs_concrete_A,
                    error_no_cudss_lu, init_cacheval, OperatorAssumptions,
                    CudaOffloadFactorization, CudaOffloadLUFactorization, CudaOffloadQRFactorization,
+                   CUDAOffload32MixedLUFactorization,
                    SparspakFactorization, KLUFactorization, UMFPACKFactorization,
                    LinearVerbosity
 using LinearSolve.LinearAlgebra, LinearSolve.SciMLBase, LinearSolve.ArrayInterface
@@ -118,4 +119,40 @@ function LinearSolve.init_cacheval(
     nothing
 end
 
+# Mixed precision CUDA LU implementation
+function SciMLBase.solve!(cache::LinearSolve.LinearCache, alg::CUDAOffload32MixedLUFactorization;
+        kwargs...)
+    if cache.isfresh
+        cacheval = LinearSolve.@get_cacheval(cache, :CUDAOffload32MixedLUFactorization)
+        # Convert to Float32 for factorization
+        A_f32 = Float32.(cache.A)
+        fact = lu(CUDA.CuArray(A_f32))
+        cache.cacheval = fact
+        cache.isfresh = false
+    end
+    fact = LinearSolve.@get_cacheval(cache, :CUDAOffload32MixedLUFactorization)
+    # Convert b to Float32, solve, then convert back to original precision
+    b_f32 = Float32.(cache.b)
+    u_f32 = CUDA.CuArray(b_f32)
+    y_f32 = ldiv!(u_f32, fact, CUDA.CuArray(b_f32))
+    # Convert back to original precision
+    y = Array(y_f32)
+    T = eltype(cache.u)
+    cache.u .= T.(y)
+    SciMLBase.build_linear_solution(alg, cache.u, nothing, cache)
+end
+
+function LinearSolve.init_cacheval(alg::CUDAOffload32MixedLUFactorization, A, b, u, Pl, Pr,
+        maxiters::Int, abstol, reltol, verbose::LinearVerbosity,
+        assumptions::OperatorAssumptions)
+    # Pre-allocate with Float32 arrays
+    A_f32 = Float32.(A)
+    T = eltype(A_f32)
+    noUnitT = typeof(zero(T))
+    luT = LinearAlgebra.lutype(noUnitT)
+    ipiv = CuVector{Int32}(undef, 0)
+    info = zero(LinearAlgebra.BlasInt)
+    return LU{luT}(CuMatrix{Float32}(undef, 0, 0), ipiv, info)
+end
+
 end
diff --git a/ext/LinearSolveMetalExt.jl b/ext/LinearSolveMetalExt.jl
index 507ae0142..0207b1425 100644
--- a/ext/LinearSolveMetalExt.jl
+++ b/ext/LinearSolveMetalExt.jl
@@ -3,7 +3,8 @@ module LinearSolveMetalExt
 using Metal, LinearSolve
 using LinearAlgebra, SciMLBase
 using SciMLBase: AbstractSciMLOperator
-using LinearSolve: ArrayInterface, MKLLUFactorization, @get_cacheval, LinearCache, SciMLBase
+using LinearSolve: ArrayInterface, MKLLUFactorization, MetalOffload32MixedLUFactorization, 
+                   @get_cacheval, LinearCache, SciMLBase, OperatorAssumptions, LinearVerbosity
 
 default_alias_A(::MetalLUFactorization, ::Any, ::Any) = false
 default_alias_b(::MetalLUFactorization, ::Any, ::Any) = false
@@ -28,4 +29,45 @@ function SciMLBase.solve!(cache::LinearCache, alg::MetalLUFactorization;
     SciMLBase.build_linear_solution(alg, y, nothing, cache)
 end
 
+# Mixed precision Metal LU implementation
+default_alias_A(::MetalOffload32MixedLUFactorization, ::Any, ::Any) = false
+default_alias_b(::MetalOffload32MixedLUFactorization, ::Any, ::Any) = false
+
+function LinearSolve.init_cacheval(alg::MetalOffload32MixedLUFactorization, A, b, u, Pl, Pr,
+        maxiters::Int, abstol, reltol, verbose::LinearVerbosity,
+        assumptions::OperatorAssumptions)
+    # Pre-allocate with Float32 arrays
+    A_f32 = Float32.(convert(AbstractMatrix, A))
+    ArrayInterface.lu_instance(A_f32)
+end
+
+function SciMLBase.solve!(cache::LinearCache, alg::MetalOffload32MixedLUFactorization;
+        kwargs...)
+    A = cache.A
+    A = convert(AbstractMatrix, A)
+    if cache.isfresh
+        cacheval = @get_cacheval(cache, :MetalOffload32MixedLUFactorization)
+        # Convert to Float32 for factorization
+        A_f32 = Float32.(A)
+        res = lu(MtlArray(A_f32))
+        # Store factorization on CPU with converted types
+        cache.cacheval = LU(Array(res.factors), Array{Int}(res.ipiv), res.info)
+        cache.isfresh = false
+    end
+    
+    fact = @get_cacheval(cache, :MetalOffload32MixedLUFactorization)
+    # Convert b to Float32 for solving
+    b_f32 = Float32.(cache.b)
+    u_f32 = similar(b_f32)
+    
+    # Create a temporary Float32 LU factorization for solving
+    fact_f32 = LU(Float32.(fact.factors), fact.ipiv, fact.info)
+    ldiv!(u_f32, fact_f32, b_f32)
+    
+    # Convert back to original precision
+    T = eltype(cache.u)
+    cache.u .= T.(u_f32)
+    SciMLBase.build_linear_solution(alg, cache.u, nothing, cache)
+end
+
 end
diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl
index e8bcc5b3e..eca4f0692 100644
--- a/src/LinearSolve.jl
+++ b/src/LinearSolve.jl
@@ -456,13 +456,17 @@ export HYPREAlgorithm
 export CudaOffloadFactorization
 export CudaOffloadLUFactorization
 export CudaOffloadQRFactorization
+export CUDAOffload32MixedLUFactorization
 export AMDGPUOffloadLUFactorization, AMDGPUOffloadQRFactorization
 export MKLPardisoFactorize, MKLPardisoIterate
 export PanuaPardisoFactorize, PanuaPardisoIterate
 export PardisoJL
 export MKLLUFactorization
+export MKL32MixedLUFactorization
 export AppleAccelerateLUFactorization
+export AppleAccelerate32MixedLUFactorization
 export MetalLUFactorization
+export MetalOffload32MixedLUFactorization
 
 export OperatorAssumptions, OperatorCondition
 
diff --git a/src/appleaccelerate.jl b/src/appleaccelerate.jl
index 3d0dcbf55..b3d454f5d 100644
--- a/src/appleaccelerate.jl
+++ b/src/appleaccelerate.jl
@@ -14,6 +14,7 @@ to avoid allocations and does not require libblastrampoline.
 """
 struct AppleAccelerateLUFactorization <: AbstractFactorization end
 
+
 @static if !Sys.isapple()
     __appleaccelerate_isavailable() = false
 else
@@ -284,3 +285,84 @@ function SciMLBase.solve!(cache::LinearCache, alg::AppleAccelerateLUFactorizatio
     SciMLBase.build_linear_solution(
         alg, cache.u, nothing, cache; retcode = ReturnCode.Success)
 end
+
+# Mixed precision AppleAccelerate implementation
+default_alias_A(::AppleAccelerate32MixedLUFactorization, ::Any, ::Any) = false
+default_alias_b(::AppleAccelerate32MixedLUFactorization, ::Any, ::Any) = false
+
+const PREALLOCATED_APPLE32_LU = begin
+    A = rand(Float32, 0, 0)
+    luinst = ArrayInterface.lu_instance(A)
+    LU(luinst.factors, similar(A, Cint, 0), luinst.info), Ref{Cint}()
+end
+
+function LinearSolve.init_cacheval(alg::AppleAccelerate32MixedLUFactorization, A, b, u, Pl, Pr,
+        maxiters::Int, abstol, reltol, verbose::LinearVerbosity,
+        assumptions::OperatorAssumptions)
+    # Pre-allocate appropriate 32-bit arrays based on input type
+    if eltype(A) <: Complex
+        A_32 = rand(ComplexF32, 0, 0)
+    else
+        A_32 = rand(Float32, 0, 0)
+    end
+    luinst = ArrayInterface.lu_instance(A_32)
+    LU(luinst.factors, similar(A_32, Cint, 0), luinst.info), Ref{Cint}()
+end
+
+function SciMLBase.solve!(cache::LinearCache, alg::AppleAccelerate32MixedLUFactorization;
+        kwargs...)
+    __appleaccelerate_isavailable() || 
+        error("Error, AppleAccelerate binary is missing but solve is being called. Report this issue")
+    A = cache.A
+    A = convert(AbstractMatrix, A)
+    
+    # Check if we have complex numbers
+    iscomplex = eltype(A) <: Complex
+    
+    if cache.isfresh
+        cacheval = @get_cacheval(cache, :AppleAccelerate32MixedLUFactorization)
+        # Convert to appropriate 32-bit type for factorization
+        if iscomplex
+            A_f32 = ComplexF32.(A)
+        else
+            A_f32 = Float32.(A)
+        end
+        res = aa_getrf!(A_f32; ipiv = cacheval[1].ipiv, info = cacheval[2])
+        fact = LU(res[1:3]...), res[4]
+        cache.cacheval = fact
+
+        if !LinearAlgebra.issuccess(fact[1])
+            return SciMLBase.build_linear_solution(
+                alg, cache.u, nothing, cache; retcode = ReturnCode.Failure)
+        end
+        cache.isfresh = false
+    end
+
+    A_lu, info = @get_cacheval(cache, :AppleAccelerate32MixedLUFactorization)
+    require_one_based_indexing(cache.u, cache.b)
+    m, n = size(A_lu, 1), size(A_lu, 2)
+    
+    # Convert b to appropriate 32-bit type for solving
+    if iscomplex
+        b_f32 = ComplexF32.(cache.b)
+    else
+        b_f32 = Float32.(cache.b)
+    end
+    
+    if m > n
+        Bc = copy(b_f32)
+        aa_getrs!('N', A_lu.factors, A_lu.ipiv, Bc; info)
+        # Convert back to original precision
+        T = eltype(cache.u)
+        cache.u .= T.(Bc[1:n])
+    else
+        u_f32 = copy(b_f32)
+        aa_getrs!('N', A_lu.factors, A_lu.ipiv, u_f32; info)
+        # Convert back to original precision
+        T = eltype(cache.u)
+        cache.u .= T.(u_f32)
+    end
+
+    SciMLBase.build_linear_solution(
+        alg, cache.u, nothing, cache; retcode = ReturnCode.Success)
+end
diff --git a/src/extension_algs.jl b/src/extension_algs.jl
index f515ff855..f44a6d4c6 100644
--- a/src/extension_algs.jl
+++ b/src/extension_algs.jl
@@ -83,6 +83,35 @@ struct CudaOffloadLUFactorization <: AbstractFactorization
     end
 end
 
+"""
+`CUDAOffload32MixedLUFactorization()`
+
+A mixed precision GPU-accelerated LU factorization that converts matrices to Float32 
+before offloading to CUDA GPU for factorization, then converts back for the solve.
+This can provide speedups when the reduced precision is acceptable and memory 
+bandwidth is a bottleneck.
+
+## Performance Notes
+- Converts Float64 matrices to Float32 for GPU factorization
+- Can be significantly faster for large matrices where memory bandwidth is limiting
+- May have reduced accuracy compared to full precision methods
+- Most beneficial when the condition number of the matrix is moderate
+
+!!! note
+
+    Using this solver requires adding the package CUDA.jl, i.e. `using CUDA`
+"""
+struct CUDAOffload32MixedLUFactorization <: AbstractFactorization
+    function CUDAOffload32MixedLUFactorization(; throwerror = true)
+        ext = Base.get_extension(@__MODULE__, :LinearSolveCUDAExt)
+        if ext === nothing && throwerror
+            error("CUDAOffload32MixedLUFactorization requires that CUDA is loaded, i.e. `using CUDA`")
+        else
+            return new()
+        end
+    end
+end
+
 """
 `CudaOffloadQRFactorization()`
 
@@ -650,6 +679,48 @@ struct MetalLUFactorization <: AbstractFactorization
     end
 end
 
+"""
+    MetalOffload32MixedLUFactorization()
+
+A mixed precision Metal GPU-accelerated LU factorization that converts matrices to Float32
+before offloading to Metal GPU for factorization, then converts back for the solve.
+This can provide speedups on Apple Silicon when reduced precision is acceptable.
+
+## Performance Notes
+- Converts Float64 matrices to Float32 for GPU factorization
+- Can be significantly faster for large matrices where memory bandwidth is limiting
+- Particularly effective on Apple Silicon Macs with unified memory architecture
+- May have reduced accuracy compared to full precision methods
+
+## Requirements
+Using this solver requires that Metal.jl is loaded: `using Metal`
+
+## Example
+```julia
+using Metal
+alg = MetalOffload32MixedLUFactorization()
+sol = solve(prob, alg)
+```
+"""
+struct MetalOffload32MixedLUFactorization <: AbstractFactorization
+    function MetalOffload32MixedLUFactorization(; throwerror = true)
+        @static if !Sys.isapple()
+            if throwerror
+                error("MetalOffload32MixedLUFactorization is only available on Apple platforms")
+            else
+                return new()
+            end
+        else
+            ext = Base.get_extension(@__MODULE__, :LinearSolveMetalExt)
+            if ext === nothing && throwerror
+                error("MetalOffload32MixedLUFactorization requires that Metal.jl is loaded, i.e. `using Metal`")
+            else
+                return new()
+            end
+        end
+    end
+end
+
 """
     BLISLUFactorization()
 
@@ -715,3 +786,51 @@ struct CUSOLVERRFFactorization <: AbstractSparseFactorization
         end
     end
 end
+
+"""
+    MKL32MixedLUFactorization()
+
+A mixed precision LU factorization using Intel MKL that performs factorization in Float32
+precision while maintaining Float64 interface. This can provide significant speedups
+for large matrices when reduced precision is acceptable.
+
+## Performance Notes
+- Converts Float64 matrices to Float32 for factorization
+- Uses optimized MKL routines for the factorization
+- Can be 2x faster than full precision for memory-bandwidth limited problems
+- May have reduced accuracy compared to full Float64 precision
+
+## Requirements
+This solver requires MKL to be available through MKL_jll.
+
+## Example
+```julia
+alg = MKL32MixedLUFactorization()
+sol = solve(prob, alg)
+```
+"""
+struct MKL32MixedLUFactorization <: AbstractFactorization end
+
+"""
+    AppleAccelerate32MixedLUFactorization()
+
+A mixed precision LU factorization using Apple's Accelerate framework that performs
+factorization in Float32 precision while maintaining Float64 interface. This can
+provide significant speedups on Apple hardware when reduced precision is acceptable.
+
+## Performance Notes
+- Converts Float64 matrices to Float32 for factorization
+- Uses optimized Accelerate routines for the factorization
+- Particularly effective on Apple Silicon with unified memory
+- May have reduced accuracy compared to full Float64 precision
+
+## Requirements
+This solver is only available on Apple platforms and requires the Accelerate framework.
+
+## Example
+```julia
+alg = AppleAccelerate32MixedLUFactorization()
+sol = solve(prob, alg)
+```
+"""
+struct AppleAccelerate32MixedLUFactorization <: AbstractFactorization end
diff --git a/src/mkl.jl b/src/mkl.jl
index 02a8bdba6..0b216c119 100644
--- a/src/mkl.jl
+++ b/src/mkl.jl
@@ -8,6 +8,7 @@ to avoid allocations and does not require libblastrampoline.
 """
 struct MKLLUFactorization <: AbstractFactorization end
 
+
 function getrf!(A::AbstractMatrix{<:ComplexF64};
         ipiv = similar(A, BlasInt, min(size(A, 1), size(A, 2))),
         info = Ref{BlasInt}(),
@@ -241,3 +242,79 @@ function SciMLBase.solve!(cache::LinearCache, alg::MKLLUFactorization;
 
     SciMLBase.build_linear_solution(alg, cache.u, nothing, cache; retcode = ReturnCode.Success)
 end
+
+# Mixed precision MKL implementation
+default_alias_A(::MKL32MixedLUFactorization, ::Any, ::Any) = false
+default_alias_b(::MKL32MixedLUFactorization, ::Any, ::Any) = false
+
+const PREALLOCATED_MKL32_LU = begin
+    A = rand(Float32, 0, 0)
+    luinst = ArrayInterface.lu_instance(A), Ref{BlasInt}()
+end
+
+function LinearSolve.init_cacheval(alg::MKL32MixedLUFactorization, A, b, u, Pl, Pr,
+        maxiters::Int, abstol, reltol, verbose::LinearVerbosity,
+        assumptions::OperatorAssumptions)
+    # Pre-allocate appropriate 32-bit arrays based on input type
+    if eltype(A) <: Complex
+        A_32 = rand(ComplexF32, 0, 0)
+    else
+        A_32 = rand(Float32, 0, 0)
+    end
+    ArrayInterface.lu_instance(A_32), Ref{BlasInt}()
+end
+
+function SciMLBase.solve!(cache::LinearCache, alg::MKL32MixedLUFactorization;
+        kwargs...)
+    A = cache.A
+    A = convert(AbstractMatrix, A)
+    
+    # Check if we have complex numbers
+    iscomplex = eltype(A) <: Complex
+    
+    if cache.isfresh
+        cacheval = @get_cacheval(cache, :MKL32MixedLUFactorization)
+        # Convert to appropriate 32-bit type for factorization
+        if iscomplex
+            A_f32 = ComplexF32.(A)
+        else
+            A_f32 = Float32.(A)
+        end
+        res = getrf!(A_f32; ipiv = cacheval[1].ipiv, info = cacheval[2])
+        fact = LU(res[1:3]...), res[4]
+        cache.cacheval = fact
+
+        if !LinearAlgebra.issuccess(fact[1])
+            return SciMLBase.build_linear_solution(
+                alg, cache.u, nothing, cache; retcode = ReturnCode.Failure)
+        end
+        cache.isfresh = false
+    end
+
+    A_lu, info = @get_cacheval(cache, :MKL32MixedLUFactorization)
+    require_one_based_indexing(cache.u, cache.b)
+    m, n = size(A_lu, 1), size(A_lu, 2)
+    
+    # Convert b to appropriate 32-bit type for solving
+    if iscomplex
+        b_f32 = ComplexF32.(cache.b)
+    else
+        b_f32 = Float32.(cache.b)
+    end
+    
+    if m > n
+        Bc = copy(b_f32)
+        getrs!('N', A_lu.factors, A_lu.ipiv, Bc; info)
+        # Convert back to original precision
+        T = eltype(cache.u)
+        cache.u .= T.(Bc[1:n])
+    else
+        u_f32 = copy(b_f32)
+        getrs!('N', A_lu.factors, A_lu.ipiv, u_f32; info)
+        # Convert back to original precision
+        T = eltype(cache.u)
+        cache.u .= T.(u_f32)
+    end
+
+    SciMLBase.build_linear_solution(alg, cache.u, nothing, cache; retcode = ReturnCode.Success)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index 048cdf61e..558f17c7b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -19,6 +19,7 @@ if GROUP == "All" || GROUP == "Core"
     @time @safetestset "Traits" include("traits.jl")
     @time @safetestset "Verbosity" include("verbosity.jl")
     @time @safetestset "BandedMatrices" include("banded.jl")
+    @time @safetestset "Mixed Precision" include("test_mixed_precision.jl")
 end
 
 # Don't run Enzyme tests on prerelease
diff --git a/test/test_mixed_precision.jl b/test/test_mixed_precision.jl
new file mode 100644
index 000000000..47a34e4af
--- /dev/null
+++ b/test/test_mixed_precision.jl
@@ -0,0 +1,77 @@
+using Test
+using LinearSolve
+using LinearAlgebra
+using Random
+
+Random.seed!(123)
+
+@testset "Mixed Precision LU Factorizations" begin
+    n = 100
+    A = rand(Float64, n, n)
+    b = rand(Float64, n)
+    
+    # Make A better conditioned to avoid excessive precision loss
+    A = A + 5.0 * I
+    
+    prob = LinearProblem(A, b)
+    
+    # Reference solution with full precision
+    sol_ref = solve(prob, LUFactorization())
+    
+    @testset "MKL32MixedLUFactorization" begin
+        if LinearSolve.usemkl
+            sol_mixed = solve(prob, MKL32MixedLUFactorization())
+            @test sol_mixed.retcode == ReturnCode.Success
+            # Check that solution is reasonably close (allowing for reduced precision)
+            @test norm(sol_mixed.u - sol_ref.u) / norm(sol_ref.u) < 1e-5
+            # Verify it actually solves the system
+            @test norm(A * sol_mixed.u - b) / norm(b) < 1e-5
+        else
+            @test_skip "MKL not available"
+        end
+    end
+    
+    @testset "AppleAccelerate32MixedLUFactorization" begin
+        if Sys.isapple()
+            sol_mixed = solve(prob, AppleAccelerate32MixedLUFactorization())
+            @test sol_mixed.retcode == ReturnCode.Success
+            # Check that solution is reasonably close (allowing for reduced precision)
+            @test norm(sol_mixed.u - sol_ref.u) / norm(sol_ref.u) < 1e-5
+            # Verify it actually solves the system
+            @test norm(A * sol_mixed.u - b) / norm(b) < 1e-5
+        else
+            @test_skip "Apple Accelerate not available"
+        end
+    end
+    
+    @testset "Complex matrices" begin
+        # Test with complex matrices
+        A_complex = rand(ComplexF64, n, n) + 5.0 * I
+        b_complex = rand(ComplexF64, n)
+        prob_complex = LinearProblem(A_complex, b_complex)
+        sol_ref_complex = solve(prob_complex, LUFactorization())
+        
+        if LinearSolve.usemkl
+            sol_mixed = solve(prob_complex, MKL32MixedLUFactorization())
+            @test sol_mixed.retcode == ReturnCode.Success
+            @test norm(sol_mixed.u - sol_ref_complex.u) / norm(sol_ref_complex.u) < 1e-5
+        end
+        
+        if Sys.isapple()
+            sol_mixed = solve(prob_complex, AppleAccelerate32MixedLUFactorization())
+            @test sol_mixed.retcode == ReturnCode.Success
+            @test norm(sol_mixed.u - sol_ref_complex.u) / norm(sol_ref_complex.u) < 1e-5
+        end
+    end
+end
+
+# Note: CUDA and Metal tests would require those packages to be loaded
+# and appropriate hardware to be available
+@testset "GPU Mixed Precision (Mocked)" begin
+    @test isdefined(LinearSolve, :CUDAOffload32MixedLUFactorization)
+    @test isdefined(LinearSolve, :MetalOffload32MixedLUFactorization)
+    
+    # These would error without the appropriate packages loaded, which is expected
+    @test_throws Exception CUDAOffload32MixedLUFactorization()
+    @test_throws Exception MetalOffload32MixedLUFactorization()
+end
\ No newline at end of file