6666===
6767
6868Using LLVM's PGO implementation for GPUs, profile data can augment the info
69- reported by kernel-info. In particular, kernel-info can estimate of the number
70- of floating point operations executed.
69+ reported by kernel-info. In particular, kernel-info can estimate the number of
70+ floating point operations executed.
7171
7272For example, the following computes 2\ :sup: `4`\ , so we expect 4 fmul
7373instructions to execute at run time:
@@ -94,16 +94,16 @@ instructions to execute at run time:
9494 }
9595
9696 $ clang -O1 -g -fopenmp --offload-arch=native test.c -o test \
97- -fprofile-generate -fprofile-generate-gpu
97+ -fprofile-generate
9898
9999 $ LLVM_PROFILE_FILE=test.profraw ./test 2 4
100100 16.000000
101101
102102 $ llvm-profdata merge -output=test.profdata * .profraw
103103
104104 $ clang -O1 -g -fopenmp --offload-arch=native test.c -foffload-lto \
105- -Rpass=kernel-info -fprofile-use-gpu =test.profdata | \
105+ -Rpass=kernel-info -fprofile-use=test.profdata | \
106106 grep " test.c:.*Floating\|double"
107- test.c:13:0: in artificial function '__omp_offloading_35_126b72c_main_l13 ', FloatingPointOpProfileCount = 0
107+ test.c:13:0: in artificial function '__omp_offloading_34_1bc8484_main_l13 ', FloatingPointOpProfileCount = 0
108108 test.c:7:9: in function 'test', double ' fmul' (' %9' ) executed 4 times
109109 test.c:4:0: in function 'test', FloatingPointOpProfileCount = 4
0 commit comments