@@ -557,14 +557,20 @@ end
557557 ldiv (F, B)
558558
559559"""
560- LinearAlgebra.peakflops(n::Integer=2000; parallel::Bool=false)
560+ LinearAlgebra.peakflops(n::Integer=4096; eltype::DataType=Float64, ntrials::Integer=3, parallel::Bool=false)
561561
562562`peakflops` computes the peak flop rate of the computer by using double precision
563563[`gemm!`](@ref LinearAlgebra.BLAS.gemm!). By default, if no arguments are specified, it
564- multiplies a matrix of size `n x n`, where `n = 2000 `. If the underlying BLAS is using
564+ multiplies two `Float64` matrices of size `n x n`, where `n = 4096 `. If the underlying BLAS is using
565565multiple threads, higher flop rates are realized. The number of BLAS threads can be set with
566566[`BLAS.set_num_threads(n)`](@ref).
567567
568+ If the keyword argument `eltype` is provided, `peakflops` will construct matrices with elements
569+ of type `eltype` for calculating the peak flop rate.
570+
571+ By default, `peakflops` will use the best timing from 3 trials. If the `ntrials` keyword argument
572+ is provided, `peakflops` will use those many trials for picking the best timing.
573+
568574If the keyword argument `parallel` is set to `true`, `peakflops` is run in parallel on all
569575the worker processors. The flop rate of the entire parallel computer is returned. When
570576running in parallel, only 1 BLAS thread is used. The argument `n` still refers to the size
@@ -574,19 +580,21 @@ of the problem that is solved on each processor.
574580 This function requires at least Julia 1.1. In Julia 1.0 it is available from
575581 the standard library `InteractiveUtils`.
576582"""
577- function peakflops (n:: Integer = 2000 ; parallel:: Bool = false )
578- a = fill (1. ,100 ,100 )
579- t = @elapsed a2 = a* a
580- a = fill (1. ,n,n)
581- t = @elapsed a2 = a* a
582- @assert a2[1 ,1 ] == n
583+ function peakflops (n:: Integer = 4096 ; eltype:: DataType = Float64, ntrials:: Integer = 3 , parallel:: Bool = false )
584+ t = zeros (Float64, ntrials)
585+ for i= 1 : ntrials
586+ a = ones (eltype,n,n)
587+ t[i] = @elapsed a2 = a* a
588+ @assert a2[1 ,1 ] == n
589+ end
590+
583591 if parallel
584592 let Distributed = Base. require (Base. PkgId (
585593 Base. UUID ((0x8ba89e20_285c_5b6f , 0x9357_94700520ee1b )), " Distributed" ))
586594 return sum (Distributed. pmap (peakflops, fill (n, Distributed. nworkers ())))
587595 end
588596 else
589- return 2 * Float64 (n)^ 3 / t
597+ return 2 * Float64 (n)^ 3 / minimum (t)
590598 end
591599end
592600
0 commit comments