Skip to content

Commit 26083f0

Browse files
committed
Only show unknown-compute-capability warning once
1 parent d2488ee commit 26083f0

File tree

2 files changed

+32
-4
lines changed

2 files changed

+32
-4
lines changed

cuda.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ Library
149149
build-depends:
150150
base >= 4.7 && < 5
151151
, bytestring >= 0.10.4
152+
, containers
152153
, filepath >= 1.0
153154
, template-haskell
154155
, uuid-types >= 1.0

src/Foreign/CUDA/Analysis/Device.chs

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@ module Foreign.CUDA.Analysis.Device (
1919

2020
#include "cbits/stubs.h"
2121

22+
import qualified Data.Set as Set
23+
import Data.Set (Set)
2224
import Data.Int
25+
import Data.IORef
2326
import Text.Show.Describe
27+
import System.IO.Unsafe
2428

2529
import Debug.Trace
2630

@@ -486,7 +490,30 @@ deviceResources = resources . computeCapability
486490
-- However, it should be OK because all library functions run in IO, so it
487491
-- is likely the user code is as well.
488492
--
489-
_ -> trace warning $ resources (Compute 6 0)
490-
where warning = unlines [ "*** Warning: Unknown CUDA device compute capability: " ++ show compute
491-
, "*** Please submit a bug report at https://github.com/tmcdonell/cuda/issues" ]
492-
493+
_ -> case warningForCC compute of
494+
Just warning -> trace warning defaultResources
495+
Nothing -> defaultResources
496+
497+
defaultResources = resources (Compute 6 0)
498+
499+
-- All this logic is to ensure the warning is only shown once per unknown
500+
-- compute capability. This sounds not worth it, but in practice, it is:
501+
-- empirically, an unknown compute capability often leads to /screenfuls/
502+
-- of warnings in accelerate-llvm-ptx otherwise.
503+
{-# NOINLINE warningForCC #-}
504+
warningForCC :: Compute -> Maybe String
505+
warningForCC compute = unsafePerformIO $ do
506+
unseen <- atomicModifyIORef' warningShown $ \seen ->
507+
-- This is just one tree traversal; lookup-insert would be two traversals.
508+
let seen' = Set.insert compute seen
509+
in (seen', Set.size seen' > Set.size seen)
510+
return $ if unseen
511+
then Just $ unlines
512+
[ "*** Warning: Unknown CUDA device compute capability: " ++ show compute
513+
, "*** Please submit a bug report at https://github.com/tmcdonell/cuda/issues"
514+
, "*** (This warning will only be shown once for this compute capability)" ]
515+
else Nothing
516+
517+
{-# NOINLINE warningShown #-}
518+
warningShown :: IORef (Set Compute)
519+
warningShown = unsafePerformIO $ newIORef mempty

0 commit comments

Comments
 (0)