@@ -19,8 +19,12 @@ module Foreign.CUDA.Analysis.Device (
1919
2020#include "cbits/stubs.h"
2121
22+ import qualified Data.Set as Set
23+ import Data.Set (Set )
2224import Data.Int
25+ import Data.IORef
2326import Text.Show.Describe
27+ import System.IO.Unsafe
2428
2529import Debug.Trace
2630
@@ -486,7 +490,30 @@ deviceResources = resources . computeCapability
486490 -- However, it should be OK because all library functions run in IO, so it
487491 -- is likely the user code is as well.
488492 --
489- _ -> trace warning $ resources (Compute 6 0 )
490- where warning = unlines [ " *** Warning: Unknown CUDA device compute capability: " ++ show compute
491- , " *** Please submit a bug report at https://github.com/tmcdonell/cuda/issues" ]
492-
493+ _ -> case warningForCC compute of
494+ Just warning -> trace warning defaultResources
495+ Nothing -> defaultResources
496+
497+ defaultResources = resources (Compute 6 0 )
498+
499+ -- All this logic is to ensure the warning is only shown once per unknown
500+ -- compute capability. This sounds not worth it, but in practice, it is:
501+ -- empirically, an unknown compute capability often leads to /screenfuls/
502+ -- of warnings in accelerate-llvm-ptx otherwise.
503+ {-# NOINLINE warningForCC #-}
504+ warningForCC :: Compute -> Maybe String
505+ warningForCC compute = unsafePerformIO $ do
506+ unseen <- atomicModifyIORef' warningShown $ \ seen ->
507+ -- This is just one tree traversal; lookup-insert would be two traversals.
508+ let seen' = Set. insert compute seen
509+ in (seen', Set. size seen' > Set. size seen)
510+ return $ if unseen
511+ then Just $ unlines
512+ [ " *** Warning: Unknown CUDA device compute capability: " ++ show compute
513+ , " *** Please submit a bug report at https://github.com/tmcdonell/cuda/issues"
514+ , " *** (This warning will only be shown once for this compute capability)" ]
515+ else Nothing
516+
517+ {-# NOINLINE warningShown #-}
518+ warningShown :: IORef (Set Compute )
519+ warningShown = unsafePerformIO $ newIORef mempty
0 commit comments