diff --git a/.github/workflows/Tests.yml b/.github/workflows/Tests.yml
index 707a92bde..12d57f276 100644
--- a/.github/workflows/Tests.yml
+++ b/.github/workflows/Tests.yml
@@ -36,6 +36,7 @@ jobs:
           - "LinearSolveHYPRE"
           - "LinearSolvePardiso"
           - "NoPre"
+          - "LinearSolveAutotune"
         os:
           - ubuntu-latest
           - macos-latest
diff --git a/.gitignore b/.gitignore
index 1b6ed4dea..67196334e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 *.jl.mem
 /docs/build/
 Manifest.toml
+LocalPreferences.toml
 
 *.swp
 .vscode
diff --git a/docs/Project.toml b/docs/Project.toml
index 98c425db3..3bca6cddf 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,9 +1,11 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+LinearSolveAutotune = "67398393-80e8-4254-b7e4-1b9a36a3c5b6"
 SciMLOperators = "c0aeaf25-5076-4817-a8d5-81caf7dfa961"
 
 [compat]
 Documenter = "1"
 LinearSolve = "1, 2, 3"
+LinearSolveAutotune = "0.1"
 SciMLOperators = "1"
diff --git a/docs/pages.jl b/docs/pages.jl
index baeff4e84..e16ee1657 100644
--- a/docs/pages.jl
+++ b/docs/pages.jl
@@ -5,7 +5,8 @@ pages = ["index.md",
     "Tutorials" => Any[
         "tutorials/caching_interface.md",
         "tutorials/accelerating_choices.md",
-        "tutorials/gpu.md"],
+        "tutorials/gpu.md",
+        "tutorials/autotune.md"],
     "Basics" => Any["basics/LinearProblem.md",
         "basics/common_solver_opts.md",
         "basics/OperatorAssumptions.md",
diff --git a/docs/src/tutorials/autotune.md b/docs/src/tutorials/autotune.md
new file mode 100644
index 000000000..40234de91
--- /dev/null
+++ b/docs/src/tutorials/autotune.md
@@ -0,0 +1,369 @@
+# Automatic Algorithm Selection with LinearSolveAutotune
+
+LinearSolve.jl includes an automatic tuning system that benchmarks all available linear algebra algorithms on your specific hardware and automatically selects optimal algorithms for different problem sizes and data types. This tutorial will show you how to use the `LinearSolveAutotune` sublibrary to optimize your linear solve performance.
+
+## Quick Start
+
+The simplest way to use the autotuner is to run it with default settings:
+
+```julia
+using LinearSolve
+using LinearSolveAutotune
+
+# Run autotune with default settings
+results = autotune_setup()
+```
+
+This will:
+- Benchmark 4 element types: `Float32`, `Float64`, `ComplexF32`, `ComplexF64`
+- Test matrix sizes from small (4×4) to medium (500×500) 
+- Create performance plots for each element type
+- Set preferences for optimal algorithm selection
+- Share results with the community (if desired)
+
+## Understanding the Results
+
+The autotune process returns benchmark results and creates several outputs:
+
+```julia
+# Basic usage returns just the DataFrame of results
+results = autotune_setup(make_plot=false)
+
+# With plotting enabled, returns (DataFrame, Plots)
+results, plots = autotune_setup(make_plot=true)
+
+# Examine the results
+println("Algorithms tested: ", unique(results.algorithm))
+println("Element types: ", unique(results.eltype))
+println("Size range: ", minimum(results.size), " to ", maximum(results.size))
+```
+
+## Customizing the Autotune Process
+
+### Element Types
+
+You can specify which element types to benchmark:
+
+```julia
+# Test only Float64 and ComplexF64
+results = autotune_setup(eltypes = (Float64, ComplexF64))
+
+# Test arbitrary precision types (excludes BLAS algorithms)
+results = autotune_setup(eltypes = (BigFloat,), telemetry = false)
+
+# Test high precision float
+results = autotune_setup(eltypes = (Float64, BigFloat))
+```
+
+### Matrix Sizes
+
+Control the range of matrix sizes tested:
+
+```julia
+# Default: small to medium matrices (4×4 to 500×500)
+results = autotune_setup(large_matrices = false)
+
+# Large matrices: includes sizes up to 10,000×10,000 (good for GPU systems)
+results = autotune_setup(large_matrices = true)
+```
+
+### Benchmark Quality vs Speed
+
+Adjust the thoroughness of benchmarking:
+
+```julia
+# Quick benchmark (fewer samples, less time per test)
+results = autotune_setup(samples = 1, seconds = 0.1)
+
+# Thorough benchmark (more samples, more time per test)  
+results = autotune_setup(samples = 10, seconds = 2.0)
+```
+
+### Privacy and Telemetry
+
+Control data sharing:
+
+```julia
+# Disable telemetry (no data shared)
+results = autotune_setup(telemetry = false)
+
+# Disable preference setting (just benchmark, don't change defaults)
+results = autotune_setup(set_preferences = false)
+
+# Disable plotting (faster, less output)
+results = autotune_setup(make_plot = false)
+```
+
+### Missing Algorithm Handling
+
+By default, autotune is assertive about finding all expected algorithms:
+
+```julia
+# Default behavior: error if expected algorithms are missing
+results = autotune_setup()  # Will error if RFLUFactorization missing
+
+# Allow missing algorithms (useful for incomplete setups)
+results = autotune_setup(skip_missing_algs = true)  # Will warn instead of error
+```
+
+**When algorithms might be missing:**
+- RFLUFactorization should always be available (hard dependency)
+- GPU algorithms require CUDA.jl or Metal.jl to be loaded
+- Apple Accelerate should work on macOS systems
+- MKL algorithms require MKL.jl package
+
+## Understanding Algorithm Compatibility
+
+The autotuner automatically detects which algorithms work with which element types:
+
+### Standard Types (Float32, Float64, ComplexF32, ComplexF64)
+- **LUFactorization**: Fast BLAS-based LU decomposition
+- **MKLLUFactorization**: Intel MKL optimized (if available)
+- **AppleAccelerateLUFactorization**: Apple Accelerate optimized (on macOS)
+- **RFLUFactorization**: Recursive factorization (cache-friendly)
+- **GenericLUFactorization**: Pure Julia implementation
+- **SimpleLUFactorization**: Simple pure Julia LU
+
+### Arbitrary Precision Types (BigFloat, Rational, etc.)
+Only pure Julia algorithms work:
+- **GenericLUFactorization**: ✅ Compatible
+- **RFLUFactorization**: ✅ Compatible  
+- **SimpleLUFactorization**: ✅ Compatible
+- **LUFactorization**: ❌ Excluded (requires BLAS)
+
+## GPU Systems
+
+On systems with CUDA or Metal GPU support, the autotuner will automatically detect and benchmark GPU algorithms:
+
+```julia
+# Enable large matrix testing for GPUs
+results = autotune_setup(large_matrices = true, samples = 3, seconds = 1.0)
+```
+
+GPU algorithms tested (when available):
+- **CudaOffloadFactorization**: CUDA GPU acceleration
+- **MetalLUFactorization**: Apple Metal GPU acceleration
+
+## Working with Results
+
+### Examining Performance Data
+
+```julia
+using DataFrames
+using Statistics
+
+results = autotune_setup(make_plot = false)
+
+# Filter successful results
+successful = filter(row -> row.success, results)
+
+# Summary by algorithm
+summary = combine(groupby(successful, [:algorithm, :eltype]), 
+                 :gflops => mean => :avg_gflops,
+                 :gflops => maximum => :max_gflops)
+sort!(summary, :avg_gflops, rev=true)
+println(summary)
+```
+
+### Performance Plots
+
+When `make_plot=true`, you get separate plots for each element type:
+
+```julia
+results, plots = autotune_setup()
+
+# plots is a dictionary keyed by element type
+for (eltype, plot) in plots
+    println("Plot for $eltype available")
+    # Plots are automatically saved as PNG and PDF files
+end
+```
+
+### Preferences Integration
+
+The autotuner sets preferences that LinearSolve.jl uses for automatic algorithm selection:
+
+```julia
+using LinearSolveAutotune
+
+# View current preferences
+LinearSolveAutotune.show_current_preferences()
+
+# Clear all autotune preferences
+LinearSolveAutotune.clear_algorithm_preferences()
+
+# Set custom preferences
+custom_categories = Dict(
+    "Float64_0-128" => "RFLUFactorization",
+    "Float64_128-256" => "LUFactorization"
+)
+LinearSolveAutotune.set_algorithm_preferences(custom_categories)
+```
+
+## Real-World Examples
+
+### High-Performance Computing
+
+```julia
+# For HPC clusters with large problems
+results = autotune_setup(
+    large_matrices = true,
+    samples = 5,
+    seconds = 1.0,
+    eltypes = (Float64, ComplexF64),
+    telemetry = false  # Privacy on shared systems
+)
+```
+
+### Workstation with GPU
+
+```julia
+# Comprehensive benchmark including GPU algorithms
+results = autotune_setup(
+    large_matrices = true,
+    samples = 3,
+    seconds = 0.5,
+    eltypes = (Float32, Float64, ComplexF32, ComplexF64)
+)
+```
+
+### Research with Arbitrary Precision
+
+```julia
+# Testing arbitrary precision arithmetic
+results = autotune_setup(
+    eltypes = (Float64, BigFloat),
+    samples = 2,
+    seconds = 0.2,  # BigFloat is slow
+    telemetry = false,
+    large_matrices = false
+)
+```
+
+### Quick Development Testing
+
+```julia
+# Fast benchmark for development/testing
+results = autotune_setup(
+    samples = 1,
+    seconds = 0.05,
+    eltypes = (Float64,),
+    make_plot = false,
+    telemetry = false,
+    set_preferences = false
+)
+```
+
+## How Preferences Affect LinearSolve.jl
+
+After running autotune, LinearSolve.jl will automatically use the optimal algorithms:
+
+```julia
+using LinearSolve
+
+# This will now use the algorithm determined by autotune
+A = rand(100, 100)  # Float64 matrix in 0-128 size range
+b = rand(100)
+prob = LinearProblem(A, b)
+sol = solve(prob)  # Uses auto-selected optimal algorithm
+
+# For different sizes, different optimal algorithms may be used
+A_large = rand(300, 300)  # Different size range
+b_large = rand(300)
+prob_large = LinearProblem(A_large, b_large)
+sol_large = solve(prob_large)  # May use different algorithm
+```
+
+## Best Practices
+
+1. **Run autotune once per system**: Results are system-specific and should be rerun when hardware changes.
+
+2. **Use appropriate matrix sizes**: Set `large_matrices=true` only if you regularly solve large systems.
+
+3. **Consider element types**: Only benchmark the types you actually use to save time.
+
+4. **Benchmark thoroughly for production**: Use higher `samples` and `seconds` values for production systems.
+
+5. **Respect privacy**: Disable telemetry on sensitive or proprietary systems.
+
+6. **Save results**: The DataFrame returned contains valuable performance data for analysis.
+
+## Troubleshooting
+
+### No Algorithms Available
+If you get "No algorithms found", ensure LinearSolve.jl is properly installed:
+```julia
+using Pkg
+Pkg.test("LinearSolve")
+```
+
+### GPU Algorithms Missing
+GPU algorithms require additional packages:
+```julia
+# For CUDA
+using CUDA, LinearSolve
+
+# For Metal (Apple Silicon)  
+using Metal, LinearSolve
+```
+
+### Preferences Not Applied
+Restart Julia after running autotune for preferences to take effect, or check:
+```julia
+LinearSolveAutotune.show_current_preferences()
+```
+
+### Slow BigFloat Performance
+This is expected - arbitrary precision arithmetic is much slower than hardware floating point. Consider using `DoubleFloats.jl` or `MultiFloats.jl` for better performance if extreme precision isn't required.
+
+## Community and Telemetry
+
+By default, autotune results are shared with the LinearSolve.jl community via public GitHub gists to help improve algorithm selection for everyone. The shared data includes:
+
+- System information (OS, CPU, core count, etc.)
+- Algorithm performance results
+- NO personal information or sensitive data
+
+Results are uploaded as public gists that can be easily searched and viewed by the community.
+
+### GitHub Authentication for Telemetry
+
+When telemetry is enabled, the system will prompt you to set up GitHub authentication if not already configured:
+
+```julia
+# This will prompt for GitHub token setup if GITHUB_TOKEN not found
+results = autotune_setup(telemetry = true)
+```
+
+The system will wait for you to create and paste a GitHub token. This helps the community by sharing performance data across different hardware configurations via easily discoverable GitHub gists.
+
+**Interactive Setup:**
+The autotune process will show step-by-step instructions and wait for you to:
+1. Create a GitHub token at the provided link
+2. Paste the token when prompted
+3. Proceed with benchmarking and automatic result sharing
+
+**Alternative - Pre-setup Environment Variable**:
+```bash
+export GITHUB_TOKEN=your_token_here
+julia
+```
+
+**Creating the GitHub Token:**
+1. Open [https://github.com/settings/tokens?type=beta](https://github.com/settings/tokens?type=beta)
+2. Click "Generate new token"
+3. Set name: "LinearSolve Autotune"
+4. Set expiration: 90 days
+5. Repository access: "Public Repositories (read-only)"
+6. Generate and copy the token
+
+### Disabling Telemetry
+
+You can disable telemetry completely:
+
+```julia
+# No authentication required
+results = autotune_setup(telemetry = false)
+```
+
+This helps the community understand performance across different hardware configurations and improves the default algorithm selection for future users, but participation is entirely optional.
\ No newline at end of file
diff --git a/lib/LinearSolveAutotune/Project.toml b/lib/LinearSolveAutotune/Project.toml
new file mode 100644
index 000000000..8c0ac0e6d
--- /dev/null
+++ b/lib/LinearSolveAutotune/Project.toml
@@ -0,0 +1,50 @@
+name = "LinearSolveAutotune"
+uuid = "67398393-80e8-4254-b7e4-1b9a36a3c5b6"
+authors = ["SciML"]
+version = "0.1.0"
+
+[deps]
+LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+GitHub = "bc5e4493-9b4d-5f90-b8aa-2b2bcaad7a26"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
+Preferences = "21216c6a-2e73-6563-6e65-726566657250"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+RecursiveFactorization = "f2c3362d-daeb-58d1-803e-2bc74f2840b4"
+BLIS = "238ceb6f-8488-4382-8f3b-76d2b52b7899"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+
+[weakdeps]
+BLIS = "238ceb6f-8488-4382-8f3b-76d2b52b7899"
+CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+Metal = "dde4c033-4e86-420c-a63e-0dd931031962"
+
+[compat]
+LinearSolve = "3"
+BenchmarkTools = "1"
+Base64 = "1"
+DataFrames = "1"
+GitHub = "5"
+Plots = "1"
+PrettyTables = "2"
+Preferences = "1"
+Statistics = "1"
+Random = "1"
+LinearAlgebra = "1"
+Printf = "1"
+Dates = "1"
+Test = "1"
+RecursiveFactorization = "0.2"
+BLIS = "0.1"
+CUDA = "5"
+Metal = "1"
+julia = "1.10"
\ No newline at end of file
diff --git a/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
new file mode 100644
index 000000000..92d6728f1
--- /dev/null
+++ b/lib/LinearSolveAutotune/src/LinearSolveAutotune.jl
@@ -0,0 +1,235 @@
+module LinearSolveAutotune
+
+using LinearSolve
+using BenchmarkTools
+using DataFrames
+using PrettyTables
+using Preferences
+using Statistics
+using Random
+using LinearAlgebra
+using Printf
+using Dates
+using Base64
+using RecursiveFactorization  # Hard dependency to ensure RFLUFactorization is available
+
+# Optional dependencies for telemetry and plotting
+using GitHub
+using Plots
+
+# Load JLL packages when available for better library access
+const BLIS_JLL_AVAILABLE = Ref(false)
+const LAPACK_JLL_AVAILABLE = Ref(false)
+
+function __init__()
+    # Try to load JLL packages at runtime for enhanced BLIS support
+    try
+        # Check if BLIS_jll is available in the current environment
+        if haskey(Base.loaded_modules, Base.PkgId(Base.UUID("068f7417-6964-5086-9a5b-bc0c5b4f7fa6"), "BLIS_jll"))
+            BLIS_JLL_AVAILABLE[] = true
+            @info "BLIS_jll detected - enhanced BLIS library access available"
+        else
+            @eval using BLIS_jll
+            BLIS_JLL_AVAILABLE[] = true
+            @info "BLIS_jll loaded for enhanced BLIS library access"
+        end
+    catch
+        @debug "BLIS_jll not available, BLISLUFactorization may not work"
+    end
+    
+    try
+        # Check if LAPACK_jll is available in the current environment
+        if haskey(Base.loaded_modules, Base.PkgId(Base.UUID("51474c39-65e3-53ba-86ba-03b1b862ec14"), "LAPACK_jll"))
+            LAPACK_JLL_AVAILABLE[] = true
+            @info "LAPACK_jll detected - enhanced LAPACK library access available"
+        else
+            @eval using LAPACK_jll  
+            LAPACK_JLL_AVAILABLE[] = true
+            @info "LAPACK_jll loaded for enhanced LAPACK library access"
+        end
+    catch
+        @debug "LAPACK_jll not available, some BLIS functionality may be limited"
+    end
+end
+
+export autotune_setup
+
+include("algorithms.jl")
+include("gpu_detection.jl")
+include("benchmarking.jl")
+include("plotting.jl")
+include("telemetry.jl")
+include("preferences.jl")
+
+"""
+    autotune_setup(; 
+        large_matrices::Bool = false,
+        telemetry::Bool = true,
+        make_plot::Bool = true,
+        set_preferences::Bool = true,
+        samples::Int = 5,
+        seconds::Float64 = 0.5,
+        eltypes = (Float32, Float64, ComplexF32, ComplexF64),
+        skip_missing_algs::Bool = false)
+
+Run a comprehensive benchmark of all available LU factorization methods and optionally:
+
+  - Create performance plots for each element type
+  - Create GitHub issue with telemetry data for community collection  
+  - Set Preferences for optimal algorithm selection
+  - Support both CPU and GPU algorithms based on hardware detection
+  - Test algorithm compatibility with different element types
+
+# Arguments
+
+  - `large_matrices::Bool = false`: Include larger matrix sizes for GPU benchmarking
+  - `telemetry::Bool = true`: Create GitHub issue with results for community data collection
+  - `make_plot::Bool = true`: Generate performance plots for each element type
+  - `set_preferences::Bool = true`: Update LinearSolve preferences with optimal algorithms
+  - `samples::Int = 5`: Number of benchmark samples per algorithm/size
+  - `seconds::Float64 = 0.5`: Maximum time per benchmark
+  - `eltypes = (Float32, Float64, ComplexF32, ComplexF64)`: Element types to benchmark
+  - `skip_missing_algs::Bool = false`: If false, error when expected algorithms are missing; if true, warn instead
+
+# Returns
+
+  - `DataFrame`: Detailed benchmark results with performance data for all element types
+  - `Dict` or `Plot`: Performance visualizations by element type (if `make_plot=true`)
+
+# Examples
+
+```julia
+using LinearSolve
+using LinearSolveAutotune
+
+# Basic autotune with default settings (4 element types)
+results = autotune_setup()
+
+# Custom autotune for GPU systems with larger matrices
+results = autotune_setup(large_matrices = true, samples = 10, seconds = 1.0)
+
+# Autotune with only Float64 and ComplexF64
+results = autotune_setup(eltypes = (Float64, ComplexF64))
+
+# Test with BigFloat (note: most BLAS algorithms will be excluded)
+results = autotune_setup(eltypes = (BigFloat,), telemetry = false)
+
+# Allow missing algorithms (useful for incomplete setups)
+results = autotune_setup(skip_missing_algs = true)
+```
+"""
+function autotune_setup(;
+        large_matrices::Bool = true,
+        telemetry::Bool = true,
+        make_plot::Bool = true,
+        set_preferences::Bool = true,
+        samples::Int = 5,
+        seconds::Float64 = 0.5,
+        eltypes = (Float32, Float64, ComplexF32, ComplexF64),
+        skip_missing_algs::Bool = false)
+    @info "Starting LinearSolve.jl autotune setup..."
+    @info "Configuration: large_matrices=$large_matrices, telemetry=$telemetry, make_plot=$make_plot, set_preferences=$set_preferences"
+    @info "Element types to benchmark: $(join(eltypes, ", "))"
+
+    # Set up GitHub authentication early if telemetry is enabled
+    github_auth = nothing
+    if telemetry
+        @info "🔗 Checking GitHub authentication for telemetry..."
+        github_auth = setup_github_authentication()
+        if github_auth === nothing
+            @info "📊 Continuing with benchmarking (results will be saved locally)"
+        end
+    end
+
+    # Get system information
+    system_info = get_system_info()
+    @info "System detected: $(system_info["os"]) $(system_info["arch"]) with $(system_info["num_cores"]) cores"
+
+    # Get available algorithms
+    cpu_algs, cpu_names = get_available_algorithms(; skip_missing_algs = skip_missing_algs)
+    @info "Found $(length(cpu_algs)) CPU algorithms: $(join(cpu_names, ", "))"
+
+    # Add GPU algorithms if available
+    gpu_algs, gpu_names = get_gpu_algorithms(; skip_missing_algs = skip_missing_algs)
+    if !isempty(gpu_algs)
+        @info "Found $(length(gpu_algs)) GPU algorithms: $(join(gpu_names, ", "))"
+    end
+
+    # Combine all algorithms
+    all_algs = vcat(cpu_algs, gpu_algs)
+    all_names = vcat(cpu_names, gpu_names)
+
+    if isempty(all_algs)
+        error("No algorithms found! This shouldn't happen.")
+    end
+
+    # Get benchmark sizes
+    sizes = collect(get_benchmark_sizes(large_matrices))
+    @info "Benchmarking $(length(sizes)) matrix sizes from $(minimum(sizes)) to $(maximum(sizes))"
+
+    # Run benchmarks
+    @info "Running benchmarks (this may take several minutes)..."
+    results_df = benchmark_algorithms(sizes, all_algs, all_names, eltypes;
+        samples = samples, seconds = seconds, large_matrices = large_matrices)
+
+    # Display results table
+    successful_results = filter(row -> row.success, results_df)
+    if nrow(successful_results) > 0
+        @info "Benchmark completed successfully!"
+
+        # Create summary table for display
+        summary = combine(groupby(successful_results, :algorithm),
+            :gflops => mean => :avg_gflops,
+            :gflops => maximum => :max_gflops,
+            nrow => :num_tests)
+        sort!(summary, :avg_gflops, rev = true)
+
+        println("\n" * "="^60)
+        println("BENCHMARK RESULTS SUMMARY")
+        println("="^60)
+        pretty_table(summary,
+            header = ["Algorithm", "Avg GFLOPs", "Max GFLOPs", "Tests"],
+            formatters = ft_printf("%.2f", [2, 3]),
+            crop = :none)
+    else
+        @warn "No successful benchmark results!"
+        return results_df, nothing
+    end
+
+    # Categorize results and find best algorithms per size range
+    categories = categorize_results(results_df)
+
+    # Set preferences if requested
+    if set_preferences && !isempty(categories)
+        set_algorithm_preferences(categories)
+    end
+
+    # Create plots if requested
+    plots_dict = nothing
+    plot_files = nothing
+    if make_plot
+        @info "Creating performance plots..."
+        plots_dict = create_benchmark_plots(results_df)
+        if !isempty(plots_dict)
+            plot_files = save_benchmark_plots(plots_dict)
+        end
+    end
+
+    # Create GitHub issue with telemetry if requested
+    if telemetry && nrow(successful_results) > 0
+        @info "📤 Creating GitHub issue with benchmark data for community collection..."
+        markdown_content = format_results_for_github(results_df, system_info, categories)
+        upload_to_github(markdown_content, plot_files, github_auth, results_df, system_info, categories)
+    end
+
+    @info "Autotune setup completed!"
+
+    # Return results and plots
+    if make_plot && plots_dict !== nothing && !isempty(plots_dict)
+        return results_df, plots_dict
+    else
+        return results_df
+    end
+end
+
+end
diff --git a/lib/LinearSolveAutotune/src/algorithms.jl b/lib/LinearSolveAutotune/src/algorithms.jl
new file mode 100644
index 000000000..42c76f5b3
--- /dev/null
+++ b/lib/LinearSolveAutotune/src/algorithms.jl
@@ -0,0 +1,170 @@
+# Algorithm detection and creation functions
+
+"""
+    get_available_algorithms(; skip_missing_algs::Bool = false)
+
+Returns a list of available LU factorization algorithms based on the system and loaded packages.
+If skip_missing_algs=false, errors when expected algorithms are missing; if true, warns instead.
+"""
+function get_available_algorithms(; skip_missing_algs::Bool = false)
+    algs = []
+    alg_names = String[]
+
+    # Core algorithms always available
+    push!(algs, LUFactorization())
+    push!(alg_names, "LUFactorization")
+
+    push!(algs, GenericLUFactorization())
+    push!(alg_names, "GenericLUFactorization")
+
+    # MKL if available
+    if LinearSolve.usemkl
+        push!(algs, MKLLUFactorization())
+        push!(alg_names, "MKLLUFactorization")
+    end
+
+    # Apple Accelerate if available (should be available on macOS)
+    if LinearSolve.appleaccelerate_isavailable()
+        push!(algs, AppleAccelerateLUFactorization())
+        push!(alg_names, "AppleAccelerateLUFactorization")
+    else
+        # Check if we're on macOS and Apple Accelerate should be available
+        if Sys.isapple() && !skip_missing_algs
+            msg = "macOS system detected but Apple Accelerate not available. This is unexpected."
+            @warn msg
+        end
+    end
+
+    # BLIS if JLL packages are available and hardware supports it
+    try
+        # Check if BLIS_jll and LAPACK_jll are available, which enable BLISLUFactorization
+        blis_jll_available = haskey(Base.loaded_modules, Base.PkgId(Base.UUID("068f7417-6964-5086-9a5b-bc0c5b4f7fa6"), "BLIS_jll"))
+        lapack_jll_available = haskey(Base.loaded_modules, Base.PkgId(Base.UUID("51474c39-65e3-53ba-86ba-03b1b862ec14"), "LAPACK_jll"))
+        
+        if (blis_jll_available || lapack_jll_available) && isdefined(LinearSolve, :BLISLUFactorization) && hasmethod(LinearSolve.BLISLUFactorization, ())
+            # Test if BLIS works on this hardware
+            try
+                test_alg = LinearSolve.BLISLUFactorization()
+                # Simple test to see if it can be created
+                push!(algs, test_alg)
+                push!(alg_names, "BLISLUFactorization")
+            catch e
+                msg = "BLISLUFactorization available but not supported on this hardware: $e"
+                if skip_missing_algs
+                    @warn msg
+                else
+                    @info msg  # BLIS hardware incompatibility is not an error, just info
+                end
+            end
+        else
+            if blis_jll_available || lapack_jll_available
+                msg = "BLIS_jll/LAPACK_jll loaded but BLISLUFactorization not available in LinearSolve"
+            else
+                msg = "BLIS_jll and LAPACK_jll not loaded - BLISLUFactorization requires these JLL packages"
+            end
+            if skip_missing_algs
+                @warn msg
+            else
+                @info msg  # Not having BLIS JLL packages is not an error
+            end
+        end
+    catch e
+        msg = "Error checking BLIS JLL package availability: $e"
+        if skip_missing_algs
+            @warn msg
+        else
+            @info msg
+        end
+    end
+
+    # RecursiveFactorization - should always be available as it's a hard dependency
+    try
+        if LinearSolve.userecursivefactorization(nothing)
+            push!(algs, RFLUFactorization())
+            push!(alg_names, "RFLUFactorization")
+        else
+            msg = "RFLUFactorization should be available (RecursiveFactorization.jl is a hard dependency)"
+            if skip_missing_algs
+                @warn msg
+            else
+                error(msg * ". Pass `skip_missing_algs=true` to continue with warning instead.")
+            end
+        end
+    catch e
+        msg = "RFLUFactorization failed to load: $e"
+        if skip_missing_algs
+            @warn msg
+        else
+            error(msg * ". Pass `skip_missing_algs=true` to continue with warning instead.")
+        end
+    end
+
+    # SimpleLU always available
+    push!(algs, SimpleLUFactorization())
+    push!(alg_names, "SimpleLUFactorization")
+
+    return algs, alg_names
+end
+
+"""
+    get_gpu_algorithms(; skip_missing_algs::Bool = false)
+
+Returns GPU-specific algorithms if GPU hardware and packages are available.
+If skip_missing_algs=false, errors when GPU hardware is detected but algorithms are missing; if true, warns instead.
+"""
+function get_gpu_algorithms(; skip_missing_algs::Bool = false)
+    gpu_algs = []
+    gpu_names = String[]
+
+    # CUDA algorithms
+    if is_cuda_available()
+        try
+            push!(gpu_algs, CudaOffloadFactorization())
+            push!(gpu_names, "CudaOffloadFactorization")
+        catch e
+            msg = "CUDA hardware detected but CudaOffloadFactorization not available: $e. Load CUDA.jl package."
+            if skip_missing_algs
+                @warn msg
+            else
+                error(msg * " Pass `skip_missing_algs=true` to continue with warning instead.")
+            end
+        end
+    end
+
+    # Metal algorithms for Apple Silicon
+    if is_metal_available()
+        try
+            push!(gpu_algs, MetalLUFactorization())
+            push!(gpu_names, "MetalLUFactorization")
+        catch e
+            msg = "Metal hardware detected but MetalLUFactorization not available: $e. Load Metal.jl package."
+            if skip_missing_algs
+                @warn msg
+            else
+                error(msg * " Pass `skip_missing_algs=true` to continue with warning instead.")
+            end
+        end
+    end
+
+    return gpu_algs, gpu_names
+end
+
+"""
+    luflop(m, n=m; innerflop=2)
+
+Calculate the number of floating point operations for LU factorization.
+From the existing LinearSolve benchmarks.
+"""
+function luflop(m, n = m; innerflop = 2)
+    sum(1:min(m, n)) do k
+        invflop = 1
+        scaleflop = isempty((k + 1):m) ? 0 : sum((k + 1):m)
+        updateflop = isempty((k + 1):n) ? 0 :
+                     sum((k + 1):n) do j
+            isempty((k + 1):m) ? 0 : sum((k + 1):m) do i
+                innerflop
+            end
+        end
+        invflop + scaleflop + updateflop
+    end
+end
diff --git a/lib/LinearSolveAutotune/src/benchmarking.jl b/lib/LinearSolveAutotune/src/benchmarking.jl
new file mode 100644
index 000000000..3bdcd780b
--- /dev/null
+++ b/lib/LinearSolveAutotune/src/benchmarking.jl
@@ -0,0 +1,251 @@
+# Core benchmarking functionality
+
+"""
+    test_algorithm_compatibility(alg, eltype::Type, test_size::Int=4)
+
+Test if an algorithm is compatible with a given element type.
+Returns true if compatible, false otherwise.
+Uses more strict rules for BLAS-dependent algorithms with non-standard types.
+"""
+function test_algorithm_compatibility(alg, eltype::Type, test_size::Int = 4)
+    # Get algorithm name for type-specific compatibility rules
+    alg_name = string(typeof(alg).name.name)
+    
+    # Define strict compatibility rules for BLAS-dependent algorithms
+    if eltype in [BigFloat, BigInt, Rational{Int}, Complex{BigFloat}]
+        # For arbitrary precision types, only allow pure Julia algorithms
+        if alg_name in ["LUFactorization", "MKLLUFactorization", "AppleAccelerateLUFactorization"]
+            return false  # These rely on BLAS and shouldn't work with arbitrary precision
+        end
+        # SimpleLUFactorization, GenericLUFactorization, and RFLUFactorization should work (pure Julia)
+    elseif eltype in [Float16, Complex{Float16}]
+        # Float16 might not be supported by all BLAS
+        if alg_name in ["MKLLUFactorization", "AppleAccelerateLUFactorization"]
+            return false  # These might not support Float16
+        end
+    end
+    
+    # For standard types or algorithms that passed the strict check, test functionality
+    try
+        # Create a small test problem with the specified element type
+        rng = MersenneTwister(123)
+        A = rand(rng, eltype, test_size, test_size)
+        b = rand(rng, eltype, test_size)
+        u0 = rand(rng, eltype, test_size)
+        
+        prob = LinearProblem(A, b; u0 = u0)
+        
+        # Try to solve - if it works, the algorithm is compatible
+        sol = solve(prob, alg)
+        
+        # Additional check: verify the solution is actually of the expected type
+        if !isa(sol.u, AbstractVector{eltype})
+            @debug "Algorithm $alg_name returned wrong element type for $eltype"
+            return false
+        end
+        
+        return true
+        
+    catch e
+        # Algorithm failed - not compatible with this element type
+        @debug "Algorithm $alg_name failed for $eltype: $e"
+        return false
+    end
+end
+
+"""
+    filter_compatible_algorithms(algorithms, alg_names, eltype::Type)
+
+Filter algorithms to only those compatible with the given element type.
+Returns filtered algorithms and names.
+"""
+function filter_compatible_algorithms(algorithms, alg_names, eltype::Type)
+    compatible_algs = []
+    compatible_names = String[]
+    
+    @info "Testing algorithm compatibility with $(eltype)..."
+    
+    for (alg, name) in zip(algorithms, alg_names)
+        if test_algorithm_compatibility(alg, eltype)
+            push!(compatible_algs, alg)
+            push!(compatible_names, name)
+            @debug "✓ $name compatible with $eltype"
+        else
+            @debug "✗ $name not compatible with $eltype"
+        end
+    end
+    
+    @info "Found $(length(compatible_algs))/$(length(algorithms)) algorithms compatible with $eltype"
+    
+    return compatible_algs, compatible_names
+end
+
+"""
+    benchmark_algorithms(sizes, algorithms, alg_names, eltypes; 
+                        samples=5, seconds=0.5, large_matrices=false)
+
+Benchmark the given algorithms across different matrix sizes and element types.
+Returns a DataFrame with results including element type information.
+"""
+function benchmark_algorithms(sizes, algorithms, alg_names, eltypes;
+        samples = 5, seconds = 0.5, large_matrices = false)
+
+    # Set benchmark parameters
+    old_params = BenchmarkTools.DEFAULT_PARAMETERS
+    BenchmarkTools.DEFAULT_PARAMETERS.seconds = seconds
+    BenchmarkTools.DEFAULT_PARAMETERS.samples = samples
+
+    # Initialize results DataFrame
+    results_data = []
+
+    try
+        for eltype in eltypes
+            @info "Benchmarking with element type: $eltype"
+            
+            # Filter algorithms for this element type
+            compatible_algs, compatible_names = filter_compatible_algorithms(algorithms, alg_names, eltype)
+            
+            if isempty(compatible_algs)
+                @warn "No algorithms compatible with $eltype, skipping..."
+                continue
+            end
+            
+            for n in sizes
+                @info "Benchmarking $n × $n matrices with $eltype..."
+
+                # Create test problem with specified element type
+                rng = MersenneTwister(123)  # Consistent seed for reproducibility
+                A = rand(rng, eltype, n, n)
+                b = rand(rng, eltype, n)
+                u0 = rand(rng, eltype, n)
+
+                for (alg, name) in zip(compatible_algs, compatible_names)
+                    gflops = 0.0
+                    success = true
+                    error_msg = ""
+
+                    try
+                        # Create the linear problem for this test
+                        prob = LinearProblem(copy(A), copy(b);
+                            u0 = copy(u0),
+                            alias = LinearAliasSpecifier(alias_A = true, alias_b = true))
+
+                        # Warmup run
+                        solve(prob, alg)
+
+                        # Actual benchmark
+                        bench = @benchmark solve($prob, $alg) setup=(prob = LinearProblem(
+                            copy($A), copy($b);
+                            u0 = copy($u0),
+                            alias = LinearAliasSpecifier(alias_A = true, alias_b = true)))
+
+                        # Calculate GFLOPs
+                        min_time_sec = minimum(bench.times) / 1e9
+                        flops = luflop(n, n)
+                        gflops = flops / min_time_sec / 1e9
+
+                    catch e
+                        success = false
+                        error_msg = string(e)
+                        @warn "Algorithm $name failed for size $n with $eltype: $error_msg"
+                    end
+
+                    # Store result with element type information
+                    push!(results_data,
+                        (
+                            size = n,
+                            algorithm = name,
+                            eltype = string(eltype),
+                            gflops = gflops,
+                            success = success,
+                            error = error_msg
+                        ))
+                end
+            end
+        end
+
+    finally
+        # Restore original benchmark parameters
+        BenchmarkTools.DEFAULT_PARAMETERS = old_params
+    end
+
+    return DataFrame(results_data)
+end
+
+"""
+    get_benchmark_sizes(large_matrices::Bool=false)
+
+Get the matrix sizes to benchmark based on the large_matrices flag.
+"""
+function get_benchmark_sizes(large_matrices::Bool = false)
+    if large_matrices
+        # For GPU benchmarking, include much larger sizes up to 10000
+        return vcat(4:8:128, 150:50:500, 600:100:1000,
+            1200:200:2000, 2500:500:5000, 6000:1000:10000)
+    else
+        # Default sizes similar to existing benchmarks
+        return vcat(4:8:128, 150:50:500)
+    end
+end
+
+"""
+    categorize_results(df::DataFrame)
+
+Categorize the benchmark results into size ranges and find the best algorithm for each range and element type.
+"""
+function categorize_results(df::DataFrame)
+    # Filter successful results
+    successful_df = filter(row -> row.success, df)
+
+    if nrow(successful_df) == 0
+        @warn "No successful benchmark results found!"
+        return Dict{String, String}()
+    end
+
+    categories = Dict{String, String}()
+
+    # Define size ranges
+    ranges = [
+        ("0-128", 1:128),
+        ("128-256", 129:256),
+        ("256-512", 257:512),
+        ("512+", 513:10000)
+    ]
+
+    # Get unique element types
+    eltypes = unique(successful_df.eltype)
+
+    for eltype in eltypes
+        @info "Categorizing results for element type: $eltype"
+        
+        # Filter results for this element type
+        eltype_df = filter(row -> row.eltype == eltype, successful_df)
+        
+        if nrow(eltype_df) == 0
+            continue
+        end
+
+        for (range_name, range) in ranges
+            # Get results for this size range and element type
+            range_df = filter(row -> row.size in range, eltype_df)
+
+            if nrow(range_df) == 0
+                continue
+            end
+
+            # Calculate average GFLOPs for each algorithm in this range
+            avg_results = combine(groupby(range_df, :algorithm), :gflops => mean => :avg_gflops)
+
+            # Find the best algorithm
+            if nrow(avg_results) > 0
+                best_idx = argmax(avg_results.avg_gflops)
+                best_alg = avg_results.algorithm[best_idx]
+                category_key = "$(eltype)_$(range_name)"
+                categories[category_key] = best_alg
+                @info "Best algorithm for $eltype size range $range_name: $best_alg ($(round(avg_results.avg_gflops[best_idx], digits=2)) GFLOPs avg)"
+            end
+        end
+    end
+
+    return categories
+end
diff --git a/lib/LinearSolveAutotune/src/gpu_detection.jl b/lib/LinearSolveAutotune/src/gpu_detection.jl
new file mode 100644
index 000000000..a3ff4bf06
--- /dev/null
+++ b/lib/LinearSolveAutotune/src/gpu_detection.jl
@@ -0,0 +1,335 @@
+# GPU hardware and package detection
+
+"""
+    is_cuda_available()
+
+Check if CUDA hardware and packages are available.
+Issues warnings if CUDA hardware is detected but packages aren't loaded.
+"""
+function is_cuda_available()
+    # Check if CUDA extension is loaded
+    ext = Base.get_extension(LinearSolve, :LinearSolveCUDAExt)
+    if ext === nothing
+        # Check if we might have CUDA hardware but missing packages
+        try
+            # Try to detect NVIDIA GPUs via nvidia-smi or similar system indicators
+            if haskey(ENV, "CUDA_VISIBLE_DEVICES") ||
+               (Sys.islinux() && isfile("/proc/driver/nvidia/version")) ||
+               (Sys.iswindows() && success(`where nvidia-smi`))
+                @warn "CUDA hardware may be available but CUDA.jl extension is not loaded. Consider adding `using CUDA` to enable GPU algorithms."
+            end
+        catch
+            # Silently continue if detection fails
+        end
+        return false
+    end
+
+    # Check if we have CUDA.jl loaded
+    try
+        CUDA = Base.get_extension(LinearSolve, :LinearSolveCUDAExt).CUDA
+        return CUDA.functional()
+    catch
+        return false
+    end
+end
+
+"""
+    is_metal_available()
+
+Check if Metal (Apple Silicon) hardware and packages are available.
+Issues warnings if Metal hardware is detected but packages aren't loaded.
+"""
+function is_metal_available()
+    # Check if we're on macOS with Apple Silicon
+    if !Sys.isapple()
+        return false
+    end
+
+    # Check if this is Apple Silicon
+    is_apple_silicon = Sys.ARCH == :aarch64
+
+    # Check if Metal extension is loaded
+    ext = Base.get_extension(LinearSolve, :LinearSolveMetalExt)
+    if ext === nothing
+        if is_apple_silicon
+            @warn "Apple Silicon hardware detected but Metal.jl extension is not loaded. Consider adding `using Metal` to enable GPU algorithms."
+        end
+        return false
+    end
+
+    # Check if we have Metal.jl loaded and functional
+    try
+        Metal = Base.get_extension(LinearSolve, :LinearSolveMetalExt).Metal
+        return Metal.functional()
+    catch
+        return false
+    end
+end
+
+"""
+    get_system_info()
+
+Get system information for telemetry reporting.
+"""
+function get_system_info()
+    info = Dict{String, Any}()
+
+    info["julia_version"] = string(VERSION)
+    info["os"] = string(Sys.KERNEL)
+    info["arch"] = string(Sys.ARCH)
+    info["cpu_name"] = Sys.cpu_info()[1].model
+    info["num_cores"] = Sys.CPU_THREADS
+    info["num_threads"] = Threads.nthreads()
+    info["blas_vendor"] = string(LinearAlgebra.BLAS.vendor())
+    info["has_cuda"] = is_cuda_available()
+    info["has_metal"] = is_metal_available()
+
+    if LinearSolve.usemkl
+        info["mkl_available"] = true
+    else
+        info["mkl_available"] = false
+    end
+
+    if LinearSolve.appleaccelerate_isavailable()
+        info["apple_accelerate_available"] = true
+    else
+        info["apple_accelerate_available"] = false
+    end
+
+    return info
+end
+
+"""
+    get_detailed_system_info()
+
+Returns a comprehensive DataFrame with detailed system information suitable for CSV export.
+Includes versioninfo() details and hardware-specific information for analysis.
+"""
+function get_detailed_system_info()
+    # Basic system information
+    system_data = Dict{String, Any}()
+    
+    # Julia and system basics - all with safe fallbacks
+    try
+        system_data["timestamp"] = string(Dates.now())
+    catch
+        system_data["timestamp"] = "unknown"
+    end
+    
+    try
+        system_data["julia_version"] = string(VERSION)
+    catch
+        system_data["julia_version"] = "unknown"
+    end
+    
+    try
+        system_data["julia_commit"] = Base.GIT_VERSION_INFO.commit[1:10]  # Short commit hash
+    catch
+        system_data["julia_commit"] = "unknown"
+    end
+    
+    try
+        system_data["os_name"] = Sys.iswindows() ? "Windows" : Sys.islinux() ? "Linux" : Sys.isapple() ? "macOS" : "Other"
+    catch
+        system_data["os_name"] = "unknown"
+    end
+    
+    try
+        system_data["os_version"] = string(Sys.KERNEL)
+    catch
+        system_data["os_version"] = "unknown"
+    end
+    
+    try
+        system_data["architecture"] = string(Sys.ARCH)
+    catch
+        system_data["architecture"] = "unknown"
+    end
+    
+    try
+        system_data["cpu_cores"] = Sys.CPU_THREADS
+    catch
+        system_data["cpu_cores"] = "unknown"
+    end
+    
+    try
+        system_data["julia_threads"] = Threads.nthreads()
+    catch
+        system_data["julia_threads"] = "unknown"
+    end
+    
+    try
+        system_data["word_size"] = Sys.WORD_SIZE
+    catch
+        system_data["word_size"] = "unknown"
+    end
+    
+    try
+        system_data["machine"] = Sys.MACHINE
+    catch
+        system_data["machine"] = "unknown"
+    end
+    
+    # CPU details
+    try
+        cpu_info = Sys.cpu_info()[1]
+        system_data["cpu_name"] = cpu_info.model
+        system_data["cpu_speed_mhz"] = cpu_info.speed
+    catch
+        system_data["cpu_name"] = "unknown"
+        system_data["cpu_speed_mhz"] = "unknown"
+    end
+    
+    # Categorize CPU vendor for easy analysis
+    try
+        cpu_name_lower = lowercase(string(system_data["cpu_name"]))
+        if contains(cpu_name_lower, "intel")
+            system_data["cpu_vendor"] = "Intel"
+        elseif contains(cpu_name_lower, "amd")
+            system_data["cpu_vendor"] = "AMD"
+        elseif contains(cpu_name_lower, "apple") || contains(cpu_name_lower, "m1") || contains(cpu_name_lower, "m2") || contains(cpu_name_lower, "m3")
+            system_data["cpu_vendor"] = "Apple"
+        else
+            system_data["cpu_vendor"] = "Other"
+        end
+    catch
+        system_data["cpu_vendor"] = "unknown"
+    end
+    
+    # BLAS and linear algebra libraries
+    try
+        system_data["blas_vendor"] = string(LinearAlgebra.BLAS.vendor())
+    catch
+        system_data["blas_vendor"] = "unknown"
+    end
+    
+    # LAPACK vendor detection (safe for different Julia versions)
+    try
+        system_data["lapack_vendor"] = string(LinearAlgebra.LAPACK.vendor())
+    catch
+        # Fallback: LAPACK vendor often matches BLAS vendor
+        system_data["lapack_vendor"] = get(system_data, "blas_vendor", "unknown")
+    end
+    
+    try
+        system_data["blas_num_threads"] = LinearAlgebra.BLAS.get_num_threads()
+    catch
+        system_data["blas_num_threads"] = "unknown"
+    end
+    
+    # LinearSolve-specific package availability
+    try
+        system_data["mkl_available"] = LinearSolve.usemkl
+    catch
+        system_data["mkl_available"] = false
+    end
+    
+    try
+        system_data["mkl_used"] = system_data["mkl_available"] && contains(lowercase(string(system_data["blas_vendor"])), "mkl")
+    catch
+        system_data["mkl_used"] = false
+    end
+    
+    try
+        system_data["apple_accelerate_available"] = LinearSolve.appleaccelerate_isavailable()
+    catch
+        system_data["apple_accelerate_available"] = false
+    end
+    
+    try
+        system_data["apple_accelerate_used"] = system_data["apple_accelerate_available"] && contains(lowercase(string(system_data["blas_vendor"])), "accelerate")
+    catch
+        system_data["apple_accelerate_used"] = false
+    end
+    
+    # BLIS availability check - based on JLL packages
+    system_data["blis_available"] = false
+    system_data["blis_used"] = false
+    system_data["blis_jll_loaded"] = false
+    system_data["lapack_jll_loaded"] = false
+    
+    try
+        # Check if BLIS_jll and LAPACK_jll are loaded
+        system_data["blis_jll_loaded"] = haskey(Base.loaded_modules, Base.PkgId(Base.UUID("068f7417-6964-5086-9a5b-bc0c5b4f7fa6"), "BLIS_jll"))
+        system_data["lapack_jll_loaded"] = haskey(Base.loaded_modules, Base.PkgId(Base.UUID("51474c39-65e3-53ba-86ba-03b1b862ec14"), "LAPACK_jll"))
+        
+        # BLIS is available if JLL packages are loaded and BLISLUFactorization exists
+        if (system_data["blis_jll_loaded"] || system_data["lapack_jll_loaded"]) && 
+           isdefined(LinearSolve, :BLISLUFactorization) && hasmethod(LinearSolve.BLISLUFactorization, ())
+            system_data["blis_available"] = true
+            # Check if BLIS is actually being used (contains "blis" in BLAS vendor)
+            system_data["blis_used"] = contains(lowercase(string(system_data["blas_vendor"])), "blis")
+        end
+    catch
+        # If there's any error checking BLIS JLL packages, leave as false
+    end
+    
+    # GPU information
+    try
+        system_data["cuda_available"] = is_cuda_available()
+    catch
+        system_data["cuda_available"] = false
+    end
+    
+    try
+        system_data["metal_available"] = is_metal_available()
+    catch
+        system_data["metal_available"] = false
+    end
+    
+    # Try to detect if CUDA/Metal packages are actually loaded
+    system_data["cuda_loaded"] = false
+    system_data["metal_loaded"] = false
+    try
+        # Check if CUDA algorithms are actually available
+        if system_data["cuda_available"]
+            system_data["cuda_loaded"] = isdefined(Main, :CUDA) || haskey(Base.loaded_modules, Base.PkgId(Base.UUID("052768ef-5323-5732-b1bb-66c8b64840ba"), "CUDA"))
+        end
+        if system_data["metal_available"]
+            system_data["metal_loaded"] = isdefined(Main, :Metal) || haskey(Base.loaded_modules, Base.PkgId(Base.UUID("dde4c033-4e86-420c-a63e-0dd931031962"), "Metal"))
+        end
+    catch
+        # If we can't detect, leave as false
+    end
+    
+    # Environment information
+    try
+        system_data["libm"] = Base.libm_name
+    catch
+        system_data["libm"] = "unknown"
+    end
+    
+    # libdl_name may not exist in all Julia versions
+    try
+        system_data["libdl"] = Base.libdl_name
+    catch
+        system_data["libdl"] = "unknown"
+    end
+    
+    # JLL package availability for enhanced library access
+    system_data["blis_jll_available"] = LinearSolveAutotune.BLIS_JLL_AVAILABLE[]
+    system_data["lapack_jll_available"] = LinearSolveAutotune.LAPACK_JLL_AVAILABLE[]
+    
+    # Memory information (if available)
+    try
+        if Sys.islinux()
+            meminfo = read(`cat /proc/meminfo`, String)
+            mem_match = match(r"MemTotal:\s*(\d+)\s*kB", meminfo)
+            if mem_match !== nothing
+                system_data["total_memory_gb"] = round(parse(Int, mem_match.captures[1]) / 1024 / 1024, digits=2)
+            else
+                system_data["total_memory_gb"] = "unknown"
+            end
+        elseif Sys.isapple()
+            mem_bytes = parse(Int, read(`sysctl -n hw.memsize`, String))
+            system_data["total_memory_gb"] = round(mem_bytes / 1024^3, digits=2)
+        else
+            system_data["total_memory_gb"] = "unknown"
+        end
+    catch
+        system_data["total_memory_gb"] = "unknown"
+    end
+    
+    # Create DataFrame with single row
+    return DataFrame([system_data])
+end
diff --git a/lib/LinearSolveAutotune/src/plotting.jl b/lib/LinearSolveAutotune/src/plotting.jl
new file mode 100644
index 000000000..bcb392c32
--- /dev/null
+++ b/lib/LinearSolveAutotune/src/plotting.jl
@@ -0,0 +1,145 @@
+# Plotting functionality for benchmark results
+
+"""
+    create_benchmark_plots(df::DataFrame; title_base="LinearSolve.jl LU Factorization Benchmark")
+
+Create separate plots for each element type showing GFLOPs vs matrix size for different algorithms.
+Returns a dictionary of plots keyed by element type.
+"""
+function create_benchmark_plots(df::DataFrame; title_base = "LinearSolve.jl LU Factorization Benchmark")
+    # Filter successful results
+    successful_df = filter(row -> row.success, df)
+
+    if nrow(successful_df) == 0
+        @warn "No successful results to plot!"
+        return Dict{String, Any}()
+    end
+
+    plots_dict = Dict{String, Any}()
+    
+    # Get unique element types
+    eltypes = unique(successful_df.eltype)
+    
+    for eltype in eltypes
+        @info "Creating plot for element type: $eltype"
+        
+        # Filter results for this element type
+        eltype_df = filter(row -> row.eltype == eltype, successful_df)
+        
+        if nrow(eltype_df) == 0
+            continue
+        end
+
+        # Get unique algorithms and sizes for this element type
+        algorithms = unique(eltype_df.algorithm)
+        sizes = sort(unique(eltype_df.size))
+
+        # Create the plot for this element type
+        title = "$title_base ($eltype)"
+        p = plot(title = title,
+            xlabel = "Matrix Size (N×N)",
+            ylabel = "Performance (GFLOPs)",
+            legend = :outertopright,
+            dpi = 300)
+
+        # Plot each algorithm for this element type
+        for alg in algorithms
+            alg_df = filter(row -> row.algorithm == alg, eltype_df)
+            if nrow(alg_df) > 0
+                # Sort by size for proper line plotting
+                sort!(alg_df, :size)
+                plot!(p, alg_df.size, alg_df.gflops,
+                    label = alg,
+                    marker = :circle,
+                    linewidth = 2,
+                    markersize = 4)
+            end
+        end
+        
+        plots_dict[eltype] = p
+    end
+
+    return plots_dict
+end
+
+"""
+    create_benchmark_plot(df::DataFrame; title="LinearSolve.jl LU Factorization Benchmark")
+
+Create a single plot showing GFLOPs vs matrix size for different algorithms.
+Maintains backward compatibility - uses first element type if multiple exist.
+"""
+function create_benchmark_plot(df::DataFrame; title = "LinearSolve.jl LU Factorization Benchmark")
+    # For backward compatibility, create plots for all element types and return the first one
+    plots_dict = create_benchmark_plots(df; title_base = title)
+    
+    if isempty(plots_dict)
+        return nothing
+    end
+    
+    # Return the first plot for backward compatibility
+    return first(values(plots_dict))
+end
+
+"""
+    save_benchmark_plots(plots_dict::Dict, filename_base="autotune_benchmark")
+
+Save multiple benchmark plots (one per element type) in both PNG and PDF formats.
+Returns a dictionary of saved filenames keyed by element type.
+"""
+function save_benchmark_plots(plots_dict::Dict, filename_base = "autotune_benchmark")
+    if isempty(plots_dict)
+        @warn "Cannot save plots: plots dictionary is empty"
+        return Dict{String, Tuple{String, String}}()
+    end
+
+    saved_files = Dict{String, Tuple{String, String}}()
+    
+    for (eltype, plot_obj) in plots_dict
+        if plot_obj === nothing
+            @warn "Cannot save plot for $eltype: plot is nothing"
+            continue
+        end
+
+        # Create filenames with element type suffix
+        eltype_safe = replace(string(eltype), "{" => "", "}" => "", "," => "_")
+        png_file = "$(filename_base)_$(eltype_safe).png"
+        pdf_file = "$(filename_base)_$(eltype_safe).pdf"
+
+        try
+            savefig(plot_obj, png_file)
+            savefig(plot_obj, pdf_file)
+            @info "Plots for $eltype saved as $png_file and $pdf_file"
+            saved_files[eltype] = (png_file, pdf_file)
+        catch e
+            @warn "Failed to save plots for $eltype: $e"
+        end
+    end
+    
+    return saved_files
+end
+
+"""
+    save_benchmark_plot(p, filename_base="autotune_benchmark")
+
+Save a single benchmark plot in both PNG and PDF formats.
+Maintains backward compatibility.
+"""
+function save_benchmark_plot(p, filename_base = "autotune_benchmark")
+    if p === nothing
+        @warn "Cannot save plot: plot is nothing"
+        return nothing
+    end
+
+    png_file = "$(filename_base).png"
+    pdf_file = "$(filename_base).pdf"
+
+    try
+        savefig(p, png_file)
+        savefig(p, pdf_file)
+        @info "Plots saved as $png_file and $pdf_file"
+        return (png_file, pdf_file)
+    catch e
+        @warn "Failed to save plots: $e"
+        return nothing
+    end
+end
diff --git a/lib/LinearSolveAutotune/src/preferences.jl b/lib/LinearSolveAutotune/src/preferences.jl
new file mode 100644
index 000000000..e2d8457fe
--- /dev/null
+++ b/lib/LinearSolveAutotune/src/preferences.jl
@@ -0,0 +1,124 @@
+# Preferences management for storing optimal algorithms in LinearSolve.jl
+
+"""
+    set_algorithm_preferences(categories::Dict{String, String})
+
+Set LinearSolve preferences based on the categorized benchmark results.
+These preferences are stored in the main LinearSolve.jl package.
+Handles element type-specific preferences with keys like "Float64_0-128".
+"""
+function set_algorithm_preferences(categories::Dict{String, String})
+    @info "Setting LinearSolve preferences based on benchmark results..."
+
+    for (category_key, algorithm) in categories
+        # Handle element type specific keys like "Float64_0-128"
+        # Convert to safe preference key format
+        pref_key = "best_algorithm_$(replace(category_key, "+" => "plus", "-" => "_"))"
+        
+        # Set preferences in LinearSolve.jl, not LinearSolveAutotune (force=true allows overwriting)
+        Preferences.set_preferences!(LinearSolve, pref_key => algorithm; force = true)
+        @info "Set preference $pref_key = $algorithm in LinearSolve.jl"
+    end
+
+    # Set a timestamp for when these preferences were created
+    Preferences.set_preferences!(LinearSolve, "autotune_timestamp" => string(Dates.now()); force = true)
+
+    @info "Preferences updated in LinearSolve.jl. You may need to restart Julia for changes to take effect."
+end
+
+"""
+    get_algorithm_preferences()
+
+Get the current algorithm preferences from LinearSolve.jl.
+Handles both legacy and element type-specific preferences.
+"""
+function get_algorithm_preferences()
+    prefs = Dict{String, String}()
+
+    # Get all LinearSolve preferences by checking common preference patterns
+    # Since there's no direct way to get all preferences, we'll check for known patterns
+    common_patterns = [
+        # Element type + size range combinations
+        "Float64_0_128", "Float64_128_256", "Float64_256_512", "Float64_512plus",
+        "Float32_0_128", "Float32_128_256", "Float32_256_512", "Float32_512plus", 
+        "ComplexF64_0_128", "ComplexF64_128_256", "ComplexF64_256_512", "ComplexF64_512plus",
+        "ComplexF32_0_128", "ComplexF32_128_256", "ComplexF32_256_512", "ComplexF32_512plus",
+        "BigFloat_0_128", "BigFloat_128_256", "BigFloat_256_512", "BigFloat_512plus",
+        # Legacy patterns without element type
+        "0_128", "128_256", "256_512", "512plus"
+    ]
+    
+    for pattern in common_patterns
+        pref_key = "best_algorithm_$pattern"
+        value = Preferences.load_preference(LinearSolve, pref_key, nothing)
+        if value !== nothing
+            # Convert back to human-readable key
+            readable_key = replace(pattern, "_" => "-", "plus" => "+")
+            prefs[readable_key] = value
+        end
+    end
+
+    return prefs
+end
+
+"""
+    clear_algorithm_preferences()
+
+Clear all autotune-related preferences from LinearSolve.jl.
+Handles both legacy and element type-specific preferences.
+"""
+function clear_algorithm_preferences()
+    @info "Clearing LinearSolve autotune preferences..."
+
+    # Clear known preference patterns
+    common_patterns = [
+        # Element type + size range combinations
+        "Float64_0_128", "Float64_128_256", "Float64_256_512", "Float64_512plus",
+        "Float32_0_128", "Float32_128_256", "Float32_256_512", "Float32_512plus", 
+        "ComplexF64_0_128", "ComplexF64_128_256", "ComplexF64_256_512", "ComplexF64_512plus",
+        "ComplexF32_0_128", "ComplexF32_128_256", "ComplexF32_256_512", "ComplexF32_512plus",
+        "BigFloat_0_128", "BigFloat_128_256", "BigFloat_256_512", "BigFloat_512plus",
+        # Legacy patterns without element type
+        "0_128", "128_256", "256_512", "512plus"
+    ]
+    
+    for pattern in common_patterns
+        pref_key = "best_algorithm_$pattern"
+        # Check if preference exists before trying to delete
+        if Preferences.has_preference(LinearSolve, pref_key)
+            Preferences.delete_preferences!(LinearSolve, pref_key; force = true)
+            @info "Cleared preference: $pref_key"
+        end
+    end
+
+    # Clear timestamp
+    if Preferences.has_preference(LinearSolve, "autotune_timestamp")
+        Preferences.delete_preferences!(LinearSolve, "autotune_timestamp"; force = true)
+    end
+
+    @info "Preferences cleared from LinearSolve.jl."
+end
+
+"""
+    show_current_preferences()
+
+Display the current algorithm preferences from LinearSolve.jl in a readable format.
+"""
+function show_current_preferences()
+    prefs = get_algorithm_preferences()
+
+    if isempty(prefs)
+        println("No autotune preferences currently set in LinearSolve.jl.")
+        return
+    end
+
+    println("Current LinearSolve.jl autotune preferences:")
+    println("="^50)
+
+    for (range, algorithm) in sort(prefs)
+        println("  Size range $range: $algorithm")
+    end
+
+    timestamp = Preferences.load_preference(LinearSolve, "autotune_timestamp", "unknown")
+    println("  Last updated: $timestamp")
+end
diff --git a/lib/LinearSolveAutotune/src/telemetry.jl b/lib/LinearSolveAutotune/src/telemetry.jl
new file mode 100644
index 000000000..e4121616d
--- /dev/null
+++ b/lib/LinearSolveAutotune/src/telemetry.jl
@@ -0,0 +1,426 @@
+# Telemetry functionality for sharing benchmark results
+
+"""
+    setup_github_authentication()
+
+Set up GitHub authentication for telemetry uploads.
+Returns authentication object if successful, nothing if setup needed.
+"""
+function setup_github_authentication()
+    # Check if GITHUB_TOKEN environment variable exists
+    if haskey(ENV, "GITHUB_TOKEN") && !isempty(ENV["GITHUB_TOKEN"])
+        return test_github_authentication(String(ENV["GITHUB_TOKEN"]))
+    end
+    
+    # No environment variable - provide setup instructions and get token
+    max_input_attempts = 3
+    
+    for input_attempt in 1:max_input_attempts
+        println()
+        println("🚀 Help Improve LinearSolve.jl for Everyone!")
+        println("="^50)
+        println("Your benchmark results help the community by improving automatic")
+        println("algorithm selection across different hardware configurations.")
+        println()
+        println("📋 Quick GitHub Token Setup (takes 30 seconds):")
+        println()
+        println("1️⃣  Open: https://github.com/settings/tokens?type=beta")
+        println("2️⃣  Click 'Generate new token'")
+        println("3️⃣  Set:")
+        println("    • Name: 'LinearSolve Autotune'")
+        println("    • Expiration: 90 days")
+        println("    • Repository access: 'Public Repositories (read-only)'")
+        println("    • Permissions: Enable 'Issues: Write'")
+        println("4️⃣  Click 'Generate token' and copy it")
+        println()
+        println("🔑 Paste your GitHub token here:")
+        println("    (If it shows julia> prompt, just paste the token there and press Enter)")
+        print("Token: ")
+        flush(stdout)
+        
+        # Get token input - handle both direct input and REPL interpretation
+        token = ""
+        try
+            sleep(0.1)  # Small delay for I/O stability
+            input_line = String(strip(readline()))
+            
+            # If we got direct input, use it
+            if !isempty(input_line)
+                token = input_line
+            else
+                # Check if token was interpreted as Julia code and became a variable
+                # Look for common GitHub token patterns in global variables
+                println("🔍 Looking for token that may have been interpreted as Julia code...")
+                for name in names(Main, all=true)
+                    if startswith(string(name), "github_pat_") || startswith(string(name), "ghp_")
+                        try
+                            value = getfield(Main, name)
+                            if isa(value, AbstractString) && length(value) > 20
+                                println("✅ Found token variable: $(name)")
+                                token = String(value)
+                                break
+                            end
+                        catch
+                            continue
+                        end
+                    end
+                end
+                
+                # If still no token, try one more direct input
+                if isempty(token)
+                    println("💡 Please paste your token again (make sure to press Enter after):")
+                    print("Token: ")
+                    flush(stdout)
+                    sleep(0.1)
+                    token = String(strip(readline()))
+                end
+            end
+            
+        catch e
+            println("❌ Input error: $e")
+            println("💡 No worries - this sometimes happens with token input")
+            continue
+        end
+        
+        if !isempty(token)
+            # Clean and validate token format
+            clean_token = strip(replace(token, r"[\r\n\t ]+" => ""))
+            if length(clean_token) < 10
+                println("❌ Token seems too short. Please check and try again.")
+                continue
+            end
+            
+            # Set environment variable
+            ENV["GITHUB_TOKEN"] = clean_token
+            
+            # Test authentication with multiple attempts (addressing the "third attempt works" issue)
+            auth_result = test_github_authentication(clean_token)
+            if auth_result !== nothing
+                return auth_result
+            end
+            
+            # If all authentication attempts failed, clean up and continue to next input attempt
+            delete!(ENV, "GITHUB_TOKEN")
+        end
+        
+        # Handle skip attempts
+        if input_attempt < max_input_attempts
+            println()
+            println("⏰ This really helps the LinearSolve.jl community!")
+            println("   Your hardware's benchmark data improves algorithm selection for everyone.")
+            println("🤝 Please try again - it only takes 30 seconds.")
+        end
+    end
+    
+    println()
+    println("📊 Continuing without telemetry. Results will be saved locally.")
+    println("💡 You can set GITHUB_TOKEN environment variable and restart Julia later.")
+    
+    return nothing
+end
+
+"""
+    test_github_authentication(token::AbstractString)
+
+Test GitHub authentication with up to 3 attempts to handle connection warmup issues.
+Returns authentication object if successful, nothing otherwise.
+"""
+function test_github_authentication(token::AbstractString)
+    max_auth_attempts = 3
+    
+    println("🔍 Testing GitHub authentication...")
+    println("📏 Token length: $(length(token))")
+    flush(stdout)
+    
+    for auth_attempt in 1:max_auth_attempts
+        try
+            if auth_attempt == 1
+                println("🌐 Establishing connection to GitHub API...")
+            elseif auth_attempt == 2
+                println("🔄 Retrying connection (sometimes GitHub needs warmup)...")
+            else
+                println("🎯 Final authentication attempt...")
+            end
+            flush(stdout)
+            
+            # Add delay between attempts to handle timing issues
+            if auth_attempt > 1
+                sleep(0.5)
+            end
+            
+            # Test authentication
+            auth_result = GitHub.authenticate(token)
+            
+            # If we get here, authentication worked
+            println("✅ Authentication successful - your results will help everyone!")
+            flush(stdout)
+            return auth_result
+            
+        catch e
+            println("❌ Attempt $auth_attempt failed: $(typeof(e))")
+            if auth_attempt < max_auth_attempts
+                println("   Retrying in a moment...")
+            else
+                println("   All authentication attempts failed.")
+                # Show safe preview of token for debugging
+                if length(token) > 8
+                    token_preview = token[1:4] * "..." * token[end-3:end]
+                    println("🔍 Token preview: $token_preview")
+                end
+                println("💡 Please verify your token has 'Issues: Write' permission and try again.")
+            end
+            flush(stdout)
+        end
+    end
+    
+    return nothing
+end
+
+"""
+    format_results_for_github(df::DataFrame, system_info::Dict, categories::Dict{String, String})
+
+Format benchmark results as a markdown table suitable for GitHub issues.
+"""
+function format_results_for_github(df::DataFrame, system_info::Dict, categories::Dict{
+        String, String})
+    # Filter successful results
+    successful_df = filter(row -> row.success, df)
+
+    if nrow(successful_df) == 0
+        return "No successful benchmark results to report."
+    end
+
+    markdown_content = """
+## LinearSolve.jl Autotune Benchmark Results
+
+### System Information
+$(format_system_info_markdown(system_info))
+
+### Performance Summary by Size Range
+$(format_categories_markdown(categories))
+
+### Detailed Results
+$(format_detailed_results_markdown(successful_df))
+
+---
+*Generated automatically by LinearSolveAutotune.jl*
+"""
+
+    return markdown_content
+end
+
+"""
+    format_system_info_markdown(system_info::Dict)
+
+Format system information as markdown.
+"""
+function format_system_info_markdown(system_info::Dict)
+    lines = String[]
+    push!(lines, "- **Julia Version**: $(system_info["julia_version"])")
+    push!(lines, "- **OS**: $(system_info["os"])")
+    push!(lines, "- **Architecture**: $(system_info["arch"])")
+    push!(lines, "- **CPU**: $(system_info["cpu_name"])")
+    push!(lines, "- **Cores**: $(system_info["num_cores"])")
+    push!(lines, "- **Threads**: $(system_info["num_threads"])")
+    push!(lines, "- **BLAS**: $(system_info["blas_vendor"])")
+    push!(lines, "- **MKL Available**: $(system_info["mkl_available"])")
+    push!(lines, "- **Apple Accelerate Available**: $(system_info["apple_accelerate_available"])")
+    push!(lines, "- **CUDA Available**: $(system_info["has_cuda"])")
+    push!(lines, "- **Metal Available**: $(system_info["has_metal"])")
+
+    return join(lines, "\n")
+end
+
+"""
+    format_categories_markdown(categories::Dict{String, String})
+
+Format the categorized results as markdown, organized by element type.
+"""
+function format_categories_markdown(categories::Dict{String, String})
+    if isempty(categories)
+        return "No category recommendations available."
+    end
+
+    lines = String[]
+    
+    # Group categories by element type
+    eltype_categories = Dict{String, Dict{String, String}}()
+    
+    for (key, algorithm) in categories
+        # Parse key like "Float64_0-128" -> eltype="Float64", range="0-128"
+        if contains(key, "_")
+            eltype, range = split(key, "_", limit=2)
+            if !haskey(eltype_categories, eltype)
+                eltype_categories[eltype] = Dict{String, String}()
+            end
+            eltype_categories[eltype][range] = algorithm
+        else
+            # Fallback for backward compatibility
+            if !haskey(eltype_categories, "Mixed")
+                eltype_categories["Mixed"] = Dict{String, String}()
+            end
+            eltype_categories["Mixed"][key] = algorithm
+        end
+    end
+    
+    # Format each element type
+    for (eltype, ranges) in sort(eltype_categories)
+        push!(lines, "#### Recommendations for $eltype")
+        push!(lines, "")
+        push!(lines, "| Size Range | Best Algorithm |")
+        push!(lines, "|------------|----------------|")
+
+        for (range, algorithm) in sort(ranges)
+            push!(lines, "| $range | $algorithm |")
+        end
+        push!(lines, "")
+    end
+
+    return join(lines, "\n")
+end
+
+"""
+    format_detailed_results_markdown(df::DataFrame)
+
+Format detailed benchmark results as markdown tables, organized by element type.
+"""
+function format_detailed_results_markdown(df::DataFrame)
+    lines = String[]
+    
+    # Get unique element types
+    eltypes = unique(df.eltype)
+    
+    for eltype in eltypes
+        push!(lines, "#### Results for $eltype")
+        push!(lines, "")
+        
+        # Filter results for this element type
+        eltype_df = filter(row -> row.eltype == eltype, df)
+        
+        if nrow(eltype_df) == 0
+            push!(lines, "No results for this element type.")
+            push!(lines, "")
+            continue
+        end
+        
+        # Create a summary table with average performance per algorithm for this element type
+        summary = combine(groupby(eltype_df, :algorithm), 
+                         :gflops => mean => :avg_gflops, 
+                         :gflops => std => :std_gflops,
+                         nrow => :num_tests)
+        sort!(summary, :avg_gflops, rev = true)
+
+        push!(lines, "| Algorithm | Avg GFLOPs | Std Dev | Tests |")
+        push!(lines, "|-----------|------------|---------|-------|")
+
+        for row in eachrow(summary)
+            avg_str = @sprintf("%.2f", row.avg_gflops)
+            std_str = @sprintf("%.2f", row.std_gflops)
+            push!(lines, "| $(row.algorithm) | $avg_str | $std_str | $(row.num_tests) |")
+        end
+        
+        push!(lines, "")
+    end
+
+    return join(lines, "\n")
+end
+
+"""
+    upload_to_github(content::String, plot_files::Union{Nothing, Tuple, Dict}, auth,
+                     results_df::DataFrame, system_info::Dict, categories::Dict)
+
+Create a GitHub issue with benchmark results for community data collection.
+Requires a pre-authenticated GitHub.jl auth object.
+"""
+function upload_to_github(content::String, plot_files::Union{Nothing, Tuple, Dict}, auth,
+                         results_df::DataFrame, system_info::Dict, categories::Dict)
+    
+    if auth === nothing
+        @info "⚠️  No GitHub authentication available. Saving results locally instead of uploading."
+        # Save locally as fallback
+        fallback_file = "autotune_results_$(replace(string(Dates.now()), ":" => "-")).md"
+        open(fallback_file, "w") do f
+            write(f, content)
+        end
+        @info "📁 Results saved locally to $fallback_file"
+        return
+    end
+    
+    @info "📤 Creating GitHub issue with benchmark results for community data collection..."
+
+    try
+        # Create issue with benchmark data
+        target_repo = "SciML/LinearSolve.jl"
+        issue_result = create_benchmark_issue(target_repo, content, auth, system_info)
+        
+        if issue_result !== nothing
+            @info "✅ Successfully created benchmark results issue: $(issue_result.html_url)"
+            @info "🔗 Your benchmark data has been shared with the LinearSolve.jl community!"
+            @info "💡 View all community benchmark data: https://github.com/SciML/LinearSolve.jl/issues?q=is%3Aissue+label%3Abenchmark-data"
+        else
+            error("Failed to create GitHub issue")
+        end
+
+    catch e
+        @warn "❌ Failed to create GitHub issue: $e"
+        @info "💡 This could be due to network issues, repository permissions, or API limits."
+
+        # Save locally as fallback
+        timestamp = replace(string(Dates.now()), ":" => "-")
+        fallback_file = "autotune_results_$(timestamp).md"
+        open(fallback_file, "w") do f
+            write(f, content)
+        end
+        @info "📁 Results saved locally to $fallback_file as backup"
+    end
+end
+
+"""
+    create_benchmark_issue(target_repo, content, auth, system_info)
+
+Create a GitHub issue with benchmark results using GitHub.create_issue().
+"""
+function create_benchmark_issue(target_repo, content, auth, system_info)
+    try
+        # Get repository object
+        repo_obj = GitHub.repo(target_repo; auth=auth)
+        
+        # Create issue title and body
+        cpu_name = get(system_info, "cpu_name", "unknown")
+        os_name = get(system_info, "os", "unknown")
+        timestamp = Dates.format(Dates.now(), "yyyy-mm-dd HH:MM")
+        
+        issue_title = "Benchmark Results: $cpu_name on $os_name ($timestamp)"
+        
+        issue_body = """
+# LinearSolve.jl Autotune Benchmark Results
+
+$content
+
+---
+
+## System Summary
+- **CPU:** $cpu_name
+- **OS:** $os_name  
+- **Timestamp:** $timestamp
+
+🤖 *Generated automatically by LinearSolve.jl autotune system*
+"""
+        
+        # Create the issue with labels
+        issue_result = GitHub.create_issue(
+            repo_obj,
+            title=issue_title,
+            body=issue_body,
+            auth=auth
+        )
+        
+        @info "✅ Created benchmark results issue #$(issue_result.number)"
+        return issue_result
+        
+    catch e
+        @warn "Failed to create benchmark issue: $e"
+        return nothing
+    end
+end
+
diff --git a/lib/LinearSolveAutotune/test/runtests.jl b/lib/LinearSolveAutotune/test/runtests.jl
new file mode 100644
index 000000000..e77846cc2
--- /dev/null
+++ b/lib/LinearSolveAutotune/test/runtests.jl
@@ -0,0 +1,271 @@
+using Test
+using LinearSolve
+using LinearSolveAutotune
+using DataFrames
+using Random
+
+@testset "LinearSolveAutotune.jl Tests" begin
+    
+    @testset "Algorithm Detection" begin
+        cpu_algs, cpu_names = LinearSolveAutotune.get_available_algorithms()
+        @test !isempty(cpu_algs)
+        @test !isempty(cpu_names)
+        @test length(cpu_algs) == length(cpu_names)
+        
+        # Test that we have at least basic algorithms
+        @test "LUFactorization" in cpu_names
+        @test "GenericLUFactorization" in cpu_names
+        
+        gpu_algs, gpu_names = LinearSolveAutotune.get_gpu_algorithms()
+        @test length(gpu_algs) == length(gpu_names)
+        # GPU algorithms might be empty if no GPU packages loaded
+    end
+    
+    @testset "Element Type Compatibility Testing" begin
+        cpu_algs, cpu_names = LinearSolveAutotune.get_available_algorithms()
+        
+        # Test Float64 compatibility (should work with all algorithms)
+        compatible_algs, compatible_names = LinearSolveAutotune.filter_compatible_algorithms(
+            cpu_algs, cpu_names, Float64)
+        @test !isempty(compatible_algs)
+        @test length(compatible_algs) == length(compatible_names)
+        
+        # Test Float32 compatibility
+        compatible_algs_f32, compatible_names_f32 = LinearSolveAutotune.filter_compatible_algorithms(
+            cpu_algs, cpu_names, Float32)
+        @test !isempty(compatible_algs_f32)
+        
+        # Test ComplexF64 compatibility
+        compatible_algs_c64, compatible_names_c64 = LinearSolveAutotune.filter_compatible_algorithms(
+            cpu_algs, cpu_names, ComplexF64)
+        @test !isempty(compatible_algs_c64)
+        
+        # Test BigFloat compatibility - should exclude BLAS algorithms but include pure Julia ones
+        compatible_algs_bf, compatible_names_bf = LinearSolveAutotune.filter_compatible_algorithms(
+            cpu_algs, cpu_names, BigFloat)
+        @test !isempty(compatible_algs_bf)
+        # Should not include LUFactorization (BLAS-dependent)
+        @test !("LUFactorization" in compatible_names_bf)
+        # Should include GenericLUFactorization (pure Julia)
+        @test "GenericLUFactorization" in compatible_names_bf
+        # Should include SimpleLUFactorization (pure Julia)
+        @test "SimpleLUFactorization" in compatible_names_bf
+        # Should include RFLUFactorization if available (pure Julia)
+        if "RFLUFactorization" in cpu_names
+            @test "RFLUFactorization" in compatible_names_bf
+        end
+        
+        # Test individual algorithm compatibility
+        for (alg, name) in zip(cpu_algs[1:min(3, end)], cpu_names[1:min(3, end)])
+            result = LinearSolveAutotune.test_algorithm_compatibility(alg, Float64)
+            @test isa(result, Bool)
+        end
+    end
+    
+    @testset "Benchmark Size Generation" begin
+        # Test small benchmark sizes
+        small_sizes = LinearSolveAutotune.get_benchmark_sizes(false)
+        @test !isempty(small_sizes)
+        @test minimum(small_sizes) >= 4
+        @test maximum(small_sizes) <= 500
+        
+        # Test large benchmark sizes
+        large_sizes = LinearSolveAutotune.get_benchmark_sizes(true)
+        @test !isempty(large_sizes)
+        @test minimum(large_sizes) >= 4
+        @test maximum(large_sizes) >= 2000
+    end
+    
+    @testset "Small Scale Benchmarking" begin
+        # Test with a very small benchmark to ensure functionality
+        cpu_algs, cpu_names = LinearSolveAutotune.get_available_algorithms()
+        
+        # Use only first 2 algorithms and small sizes for fast testing
+        test_algs = cpu_algs[1:min(2, end)]
+        test_names = cpu_names[1:min(2, end)]
+        test_sizes = [4, 8]  # Very small sizes for fast testing
+        test_eltypes = (Float64,)  # Single element type for speed
+        
+        results_df = LinearSolveAutotune.benchmark_algorithms(
+            test_sizes, test_algs, test_names, test_eltypes;
+            samples = 1, seconds = 0.1)
+        
+        @test isa(results_df, DataFrame)
+        @test nrow(results_df) > 0
+        @test hasproperty(results_df, :size)
+        @test hasproperty(results_df, :algorithm)
+        @test hasproperty(results_df, :eltype)
+        @test hasproperty(results_df, :gflops)
+        @test hasproperty(results_df, :success)
+        @test hasproperty(results_df, :error)
+        
+        # Test that we have results for both sizes and element types
+        @test length(unique(results_df.size)) <= length(test_sizes)
+        @test all(eltype -> eltype in ["Float64"], unique(results_df.eltype))
+        
+        # Check that successful results have positive GFLOPs
+        successful_results = filter(row -> row.success, results_df)
+        if nrow(successful_results) > 0
+            @test all(gflops -> gflops > 0, successful_results.gflops)
+        end
+    end
+    
+    @testset "Result Categorization" begin
+        # Create mock results data for testing
+        mock_data = [
+            (size = 50, algorithm = "TestAlg1", eltype = "Float64", gflops = 10.0, success = true, error = ""),
+            (size = 100, algorithm = "TestAlg1", eltype = "Float64", gflops = 12.0, success = true, error = ""),
+            (size = 200, algorithm = "TestAlg1", eltype = "Float64", gflops = 8.0, success = true, error = ""),
+            (size = 50, algorithm = "TestAlg2", eltype = "Float64", gflops = 8.0, success = true, error = ""),
+            (size = 100, algorithm = "TestAlg2", eltype = "Float64", gflops = 15.0, success = true, error = ""),
+            (size = 200, algorithm = "TestAlg2", eltype = "Float64", gflops = 14.0, success = true, error = ""),
+            (size = 50, algorithm = "TestAlg1", eltype = "Float32", gflops = 9.0, success = true, error = ""),
+            (size = 100, algorithm = "TestAlg1", eltype = "Float32", gflops = 11.0, success = true, error = ""),
+        ]
+        
+        test_df = DataFrame(mock_data)
+        categories = LinearSolveAutotune.categorize_results(test_df)
+        
+        @test isa(categories, Dict{String, String})
+        @test !isempty(categories)
+        
+        # Check that categories are properly formatted with element types
+        for (key, value) in categories
+            @test contains(key, "_")  # Should have element type prefix
+            @test !isempty(value)
+        end
+    end
+    
+    @testset "Plotting Functions" begin
+        # Create mock results for plotting tests
+        mock_data = [
+            (size = 50, algorithm = "TestAlg1", eltype = "Float64", gflops = 10.0, success = true, error = ""),
+            (size = 100, algorithm = "TestAlg1", eltype = "Float64", gflops = 12.0, success = true, error = ""),
+            (size = 50, algorithm = "TestAlg2", eltype = "Float64", gflops = 8.0, success = true, error = ""),
+            (size = 100, algorithm = "TestAlg2", eltype = "Float64", gflops = 15.0, success = true, error = ""),
+            (size = 50, algorithm = "TestAlg1", eltype = "Float32", gflops = 9.0, success = true, error = ""),
+            (size = 100, algorithm = "TestAlg1", eltype = "Float32", gflops = 11.0, success = true, error = ""),
+        ]
+        
+        test_df = DataFrame(mock_data)
+        
+        # Test multi-element type plotting
+        plots_dict = LinearSolveAutotune.create_benchmark_plots(test_df)
+        @test isa(plots_dict, Dict)
+        @test !isempty(plots_dict)
+        @test haskey(plots_dict, "Float64")
+        @test haskey(plots_dict, "Float32")
+        
+        # Test backward compatibility plotting
+        single_plot = LinearSolveAutotune.create_benchmark_plot(test_df)
+        @test single_plot !== nothing
+        
+        # Test with empty data
+        empty_df = DataFrame(size = Int[], algorithm = String[], eltype = String[], 
+                           gflops = Float64[], success = Bool[], error = String[])
+        empty_plots = LinearSolveAutotune.create_benchmark_plots(empty_df)
+        @test isa(empty_plots, Dict)
+        @test isempty(empty_plots)
+    end
+    
+    @testset "System Information" begin
+        system_info = LinearSolveAutotune.get_system_info()
+        @test isa(system_info, Dict)
+        
+        # Check required fields
+        required_fields = ["julia_version", "os", "arch", "cpu_name", "num_cores", 
+                          "num_threads", "blas_vendor", "has_cuda", "has_metal",
+                          "mkl_available", "apple_accelerate_available"]
+        
+        for field in required_fields
+            @test haskey(system_info, field)
+        end
+        
+        # Check types
+        @test isa(system_info["julia_version"], String)
+        @test isa(system_info["num_cores"], Int)
+        @test isa(system_info["num_threads"], Int)
+        @test isa(system_info["has_cuda"], Bool)
+        @test isa(system_info["has_metal"], Bool)
+    end
+    
+    @testset "Preference Management" begin
+        # Test setting and getting preferences
+        test_categories = Dict{String, String}(
+            "Float64_0-128" => "TestAlg1",
+            "Float64_128-256" => "TestAlg2",
+            "Float32_0-128" => "TestAlg1"
+        )
+        
+        # Clear any existing preferences first
+        LinearSolveAutotune.clear_algorithm_preferences()
+        
+        # Set test preferences
+        LinearSolveAutotune.set_algorithm_preferences(test_categories)
+        
+        # Get preferences back
+        retrieved_prefs = LinearSolveAutotune.get_algorithm_preferences()
+        @test isa(retrieved_prefs, Dict{String, String})
+        @test !isempty(retrieved_prefs)
+        
+        # Verify we can retrieve what we set
+        for (key, value) in test_categories
+            @test haskey(retrieved_prefs, key)
+            @test retrieved_prefs[key] == value
+        end
+        
+        # Test clearing preferences
+        LinearSolveAutotune.clear_algorithm_preferences()
+        cleared_prefs = LinearSolveAutotune.get_algorithm_preferences()
+        @test isempty(cleared_prefs)
+    end
+    
+    @testset "Integration Test - Mini Autotune" begin
+        # Test the full autotune_setup function with minimal parameters
+        # This is an integration test with very small scale to ensure everything works together
+        
+        # Skip telemetry and use minimal settings for testing
+        result = LinearSolveAutotune.autotune_setup(
+            large_matrices = false,
+            telemetry = false,
+            make_plot = false,
+            set_preferences = false,
+            samples = 1,
+            seconds = 0.1,
+            eltypes = (Float64,)  # Single element type for speed
+        )
+        
+        @test isa(result, DataFrame)
+        @test nrow(result) > 0
+        @test hasproperty(result, :size)
+        @test hasproperty(result, :algorithm)
+        @test hasproperty(result, :eltype)
+        @test hasproperty(result, :gflops)
+        @test hasproperty(result, :success)
+        
+        # Test with multiple element types
+        result_multi = LinearSolveAutotune.autotune_setup(
+            large_matrices = false,
+            telemetry = false,
+            make_plot = true,  # Test plotting integration
+            set_preferences = false,
+            samples = 1,
+            seconds = 0.1,
+            eltypes = (Float64, Float32)
+        )
+        
+        # Should return tuple of (DataFrame, Dict) when make_plot=true
+        @test isa(result_multi, Tuple)
+        @test length(result_multi) == 2
+        @test isa(result_multi[1], DataFrame)
+        @test isa(result_multi[2], Dict)  # Plots dictionary
+        
+        df, plots = result_multi
+        @test nrow(df) > 0
+        @test !isempty(plots)
+        
+        # Check that we have results for multiple element types
+        eltypes_in_results = unique(df.eltype)
+        @test length(eltypes_in_results) >= 1  # At least one element type should work
+    end
+end
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 53140926b..32dee2ed7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -35,6 +35,11 @@ if GROUP == "DefaultsLoading"
     @time @safetestset "Defaults Loading Tests" include("defaults_loading.jl")
 end
 
+if GROUP == "LinearSolveAutotune"
+    Pkg.activate(joinpath(dirname(@__DIR__), "lib", GROUP))
+    Pkg.test(GROUP, julia_args=["--check-bounds=auto", "--compiled-modules=yes", "--depwarn=yes"], force_latest_compatible_version=false, allow_reresolve=true)
+end
+
 if GROUP == "LinearSolveCUDA"
     Pkg.activate("gpu")
     Pkg.develop(PackageSpec(path = dirname(@__DIR__)))