Skip to content

Commit f15338f

Browse files
committed
Added L2 cache size property to OpenCL, HIP and mocked PyCUDA backends
1 parent 6e6e5fb commit f15338f

File tree

3 files changed

+7
-1
lines changed

3 files changed

+7
-1
lines changed

kernel_tuner/backends/hip.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
5959

6060
self.name = self.hipProps._name.decode('utf-8')
6161
self.max_threads = self.hipProps.maxThreadsPerBlock
62+
self.cache_size_L2 = self.hipProps.l2CacheSize
6263
self.device = device
6364
self.compiler_options = compiler_options or []
6465
self.iterations = iterations

kernel_tuner/backends/opencl.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ def __init__(
4545
self.max_threads = self.ctx.devices[0].get_info(
4646
cl.device_info.MAX_WORK_GROUP_SIZE
4747
)
48+
# TODO the L2 cache size request fails
49+
# self.cache_size_L2 = self.ctx.devices[0].get_info(
50+
# cl.device_affinity_domain.L2_CACHE
51+
# )
4852
self.compiler_options = compiler_options or []
4953

5054
# observer stuff

test/test_pycuda_mocked.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ def setup_mock(drv):
1313
context = Mock()
1414
devprops = {'MAX_THREADS_PER_BLOCK': 1024,
1515
'COMPUTE_CAPABILITY_MAJOR': 5,
16-
'COMPUTE_CAPABILITY_MINOR': 5}
16+
'COMPUTE_CAPABILITY_MINOR': 5,
17+
'L2_CACHE_SIZE': 4096}
1718
context.return_value.get_device.return_value.get_attributes.return_value = devprops
1819
context.return_value.get_device.return_value.compute_capability.return_value = "55"
1920
drv.Device.return_value.retain_primary_context.return_value = context()

0 commit comments

Comments
 (0)