|
1 | 1 | ; 32 storage locations is sufficient for all current-generation NVIDIA GPUs |
2 | | -; 128 bits per warp is sufficient for all fundamental data types and complex |
3 | | -; Reducing storage for small data types or increasing it for user-defined types |
4 | | -; will likely require an additional pass to track group algorithm usage |
5 | | -@__clc__group_scratch = internal addrspace(3) global [128 x i64] undef, align 1 |
| 2 | +@__clc__group_scratch_i1 = internal addrspace(3) global [32 x i1] poison, align 1 |
| 3 | +@__clc__group_scratch_i8 = internal addrspace(3) global [32 x i8] poison, align 1 |
| 4 | +@__clc__group_scratch_i16 = internal addrspace(3) global [32 x i16] poison, align 2 |
| 5 | +@__clc__group_scratch_i32 = internal addrspace(3) global [32 x i32] poison, align 4 |
| 6 | +@__clc__group_scratch_i64 = internal addrspace(3) global [32 x i64] poison, align 8 |
| 7 | +@__clc__group_scratch_i128 = internal addrspace(3) global [32 x i128] poison, align 8 |
6 | 8 |
|
7 | | -define i8 addrspace(3)* @__clc__get_group_scratch_bool() nounwind alwaysinline { |
| 9 | +define ptr addrspace(3) @__clc__get_group_scratch_bool() nounwind alwaysinline { |
8 | 10 | entry: |
9 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
10 | | - %cast = bitcast i64 addrspace(3)* %ptr to i8 addrspace(3)* |
11 | | - ret i8 addrspace(3)* %cast |
| 11 | + ret ptr addrspace(3) @__clc__group_scratch_i1 |
12 | 12 | } |
13 | 13 |
|
14 | | -define i8 addrspace(3)* @__clc__get_group_scratch_char() nounwind alwaysinline { |
| 14 | +define ptr addrspace(3) @__clc__get_group_scratch_char() nounwind alwaysinline { |
15 | 15 | entry: |
16 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
17 | | - %cast = bitcast i64 addrspace(3)* %ptr to i8 addrspace(3)* |
18 | | - ret i8 addrspace(3)* %cast |
| 16 | + ret ptr addrspace(3) @__clc__group_scratch_i8 |
19 | 17 | } |
20 | 18 |
|
21 | | -define i16 addrspace(3)* @__clc__get_group_scratch_short() nounwind alwaysinline { |
| 19 | +define ptr addrspace(3) @__clc__get_group_scratch_short() nounwind alwaysinline { |
22 | 20 | entry: |
23 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
24 | | - %cast = bitcast i64 addrspace(3)* %ptr to i16 addrspace(3)* |
25 | | - ret i16 addrspace(3)* %cast |
| 21 | + ret ptr addrspace(3) @__clc__group_scratch_i16 |
26 | 22 | } |
27 | 23 |
|
28 | | -define i32 addrspace(3)* @__clc__get_group_scratch_int() nounwind alwaysinline { |
| 24 | +define ptr addrspace(3) @__clc__get_group_scratch_int() nounwind alwaysinline { |
29 | 25 | entry: |
30 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
31 | | - %cast = bitcast i64 addrspace(3)* %ptr to i32 addrspace(3)* |
32 | | - ret i32 addrspace(3)* %cast |
| 26 | + ret ptr addrspace(3) @__clc__group_scratch_i32 |
33 | 27 | } |
34 | 28 |
|
35 | | -define i64 addrspace(3)* @__clc__get_group_scratch_long() nounwind alwaysinline { |
| 29 | +define ptr addrspace(3) @__clc__get_group_scratch_long() nounwind alwaysinline { |
36 | 30 | entry: |
37 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
38 | | - %cast = bitcast i64 addrspace(3)* %ptr to i64 addrspace(3)* |
39 | | - ret i64 addrspace(3)* %cast |
| 31 | + ret ptr addrspace(3) @__clc__group_scratch_i64 |
40 | 32 | } |
41 | 33 |
|
42 | | -define half addrspace(3)* @__clc__get_group_scratch_half() nounwind alwaysinline { |
| 34 | +define ptr addrspace(3) @__clc__get_group_scratch_half() nounwind alwaysinline { |
43 | 35 | entry: |
44 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
45 | | - %cast = bitcast i64 addrspace(3)* %ptr to half addrspace(3)* |
46 | | - ret half addrspace(3)* %cast |
| 36 | + ret ptr addrspace(3) @__clc__group_scratch_i16 |
47 | 37 | } |
48 | 38 |
|
49 | | -define float addrspace(3)* @__clc__get_group_scratch_float() nounwind alwaysinline { |
| 39 | +define ptr addrspace(3) @__clc__get_group_scratch_float() nounwind alwaysinline { |
50 | 40 | entry: |
51 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
52 | | - %cast = bitcast i64 addrspace(3)* %ptr to float addrspace(3)* |
53 | | - ret float addrspace(3)* %cast |
| 41 | + ret ptr addrspace(3) @__clc__group_scratch_i32 |
54 | 42 | } |
55 | 43 |
|
56 | | -define double addrspace(3)* @__clc__get_group_scratch_double() nounwind alwaysinline { |
| 44 | +define ptr addrspace(3) @__clc__get_group_scratch_double() nounwind alwaysinline { |
57 | 45 | entry: |
58 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
59 | | - %cast = bitcast i64 addrspace(3)* %ptr to double addrspace(3)* |
60 | | - ret double addrspace(3)* %cast |
| 46 | + ret ptr addrspace(3) @__clc__group_scratch_i64 |
61 | 47 | } |
62 | 48 |
|
63 | | -%complex_half = type { |
64 | | - half, |
65 | | - half |
66 | | -} |
67 | | - |
68 | | -%complex_float = type { |
69 | | - float, |
70 | | - float |
71 | | -} |
72 | | - |
73 | | -%complex_double = type { |
74 | | - double, |
75 | | - double |
76 | | -} |
77 | | - |
78 | | -define %complex_half addrspace(3)* @__clc__get_group_scratch_complex_half() nounwind alwaysinline { |
| 49 | +define ptr addrspace(3) @__clc__get_group_scratch_complex_half() nounwind alwaysinline { |
79 | 50 | entry: |
80 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
81 | | - %cast = bitcast i64 addrspace(3)* %ptr to %complex_half addrspace(3)* |
82 | | - ret %complex_half addrspace(3)* %cast |
| 51 | + ret ptr addrspace(3) @__clc__group_scratch_i32 |
83 | 52 | } |
84 | 53 |
|
85 | | -define %complex_float addrspace(3)* @__clc__get_group_scratch_complex_float() nounwind alwaysinline { |
| 54 | +define ptr addrspace(3) @__clc__get_group_scratch_complex_float() nounwind alwaysinline { |
86 | 55 | entry: |
87 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
88 | | - %cast = bitcast i64 addrspace(3)* %ptr to %complex_float addrspace(3)* |
89 | | - ret %complex_float addrspace(3)* %cast |
| 56 | + ret ptr addrspace(3) @__clc__group_scratch_i64 |
90 | 57 | } |
91 | 58 |
|
92 | | -define %complex_double addrspace(3)* @__clc__get_group_scratch_complex_double() nounwind alwaysinline { |
| 59 | +define ptr addrspace(3) @__clc__get_group_scratch_complex_double() nounwind alwaysinline { |
93 | 60 | entry: |
94 | | - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
95 | | - %cast = bitcast i64 addrspace(3)* %ptr to %complex_double addrspace(3)* |
96 | | - ret %complex_double addrspace(3)* %cast |
| 61 | + ret ptr addrspace(3) @__clc__group_scratch_i128 |
97 | 62 | } |
0 commit comments