@@ -1398,7 +1398,7 @@ void test_cache_read1(void* _args, int32_t num_args)
13981398 };
13991399 for (int32_t i = 0; i < 32; i += 1) {
14001400 for (int32_t j = 0; j < 32; j += 1) {
1401- B[((32 * i) + j)] = (2.00000000f * A_local_temp_buffer[((64 * i) + j)]);
1401+ B[((32 * i) + j)] = (A_local_temp_buffer[((64 * i) + j)] * 2.00000000f );
14021402 };
14031403 };
14041404 for (int32_t cache_ax0_0 = 0; cache_ax0_0 < 16; cache_ax0_0 += 1) {
@@ -1408,7 +1408,7 @@ void test_cache_read1(void* _args, int32_t num_args)
14081408 };
14091409 for (int32_t i = 0; i < 16; i += 1) {
14101410 for (int32_t j = 0; j < 16; j += 1) {
1411- C[((16 * i) + j)] = (1.00000000f + B_local_temp_buffer[((32 * i) + j)]);
1411+ C[((16 * i) + j)] = (B_local_temp_buffer[((32 * i) + j)] + 1.00000000f );
14121412 };
14131413 };
14141414 cinn_buffer_free((void*)(0), _B);
@@ -1480,7 +1480,7 @@ void test_cache_read2(void* _args, int32_t num_args)
14801480 for (int32_t i = 0; i < 64; i += 1) {
14811481 for (int32_t j = 0; j < 32; j += 1) {
14821482 A_local_temp_buffer[((32 * i) + j)] = A[((32 * i) + j)];
1483- B[((32 * i) + j)] = (2.00000000f * A_local_temp_buffer[((32 * i) + j)]);
1483+ B[((32 * i) + j)] = (A_local_temp_buffer[((32 * i) + j)] * 2.00000000f );
14841484 };
14851485 };
14861486 cinn_buffer_free((void*)(0), _B);
@@ -1553,7 +1553,7 @@ void test_cache_write1(void* _args, int32_t num_args)
15531553 float* C = ((float*)(_C->memory));
15541554 for (int32_t i = 0; i < 64; i += 1) {
15551555 for (int32_t j = 0; j < 32; j += 1) {
1556- B_local_temp_buffer[((32 * i) + j)] = (2.00000000f * A[((32 * i) + j)]);
1556+ B_local_temp_buffer[((32 * i) + j)] = (A[((32 * i) + j)] * 2.00000000f );
15571557 };
15581558 };
15591559 for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) {
@@ -1563,7 +1563,7 @@ void test_cache_write1(void* _args, int32_t num_args)
15631563 };
15641564 for (int32_t i = 0; i < 64; i += 1) {
15651565 for (int32_t j = 0; j < 32; j += 1) {
1566- C_local_temp_buffer[((32 * i) + j)] = (1.00000000f + B[((32 * i) + j)]);
1566+ C_local_temp_buffer[((32 * i) + j)] = (B[((32 * i) + j)] + 1.00000000f );
15671567 };
15681568 };
15691569 for (int32_t cache_ax0_0 = 0; cache_ax0_0 < 64; cache_ax0_0 += 1) {
@@ -1637,7 +1637,7 @@ void test_cache_write2(void* _args, int32_t num_args)
16371637 float* B = ((float*)(_B->memory));
16381638 for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) {
16391639 for (int32_t cache_ax1 = 0; cache_ax1 < 32; cache_ax1 += 1) {
1640- B_local_temp_buffer[((32 * cache_ax0) + cache_ax1)] = (2.00000000f * A[((32 * cache_ax0) + cache_ax1)]);
1640+ B_local_temp_buffer[((32 * cache_ax0) + cache_ax1)] = (A[((32 * cache_ax0) + cache_ax1)] * 2.00000000f );
16411641 B[((32 * cache_ax0) + cache_ax1)] = B_local_temp_buffer[((32 * cache_ax0) + cache_ax1)];
16421642 };
16431643 };
@@ -1713,7 +1713,7 @@ void test_cache_read3(const float* __restrict__ A, float* __restrict__ C)
17131713 };
17141714 for (int32_t i = 0; i < 32; i += 1) {
17151715 for (int32_t j = 0; j < 32; j += 1) {
1716- B[((32 * i) + j)] = (2.00000000f * A_local_temp_buffer[((64 * i) + j)]);
1716+ B[((32 * i) + j)] = (A_local_temp_buffer[((64 * i) + j)] * 2.00000000f );
17171717 };
17181718 __syncthreads();
17191719 };
@@ -1725,7 +1725,7 @@ void test_cache_read3(const float* __restrict__ A, float* __restrict__ C)
17251725 for (int32_t i = 0; i < 16; i += 1) {
17261726 __syncthreads();
17271727 for (int32_t j = 0; j < 16; j += 1) {
1728- C[((16 * i) + j)] = (1.00000000f + B_local_temp_buffer[((32 * i) + j)]);
1728+ C[((16 * i) + j)] = (B_local_temp_buffer[((32 * i) + j)] + 1.00000000f );
17291729 };
17301730 };
17311731}
@@ -1794,7 +1794,7 @@ void test_cache_write3(const float* __restrict__ A, float* __restrict__ C)
17941794 float* B = _B_temp_buffer;
17951795 for (int32_t i = 0; i < 64; i += 1) {
17961796 for (int32_t j = 0; j < 32; j += 1) {
1797- B_local_temp_buffer[((32 * i) + j)] = (2.00000000f * A[((32 * i) + j)]);
1797+ B_local_temp_buffer[((32 * i) + j)] = (A[((32 * i) + j)] * 2.00000000f );
17981798 };
17991799 };
18001800 for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) {
@@ -1805,7 +1805,7 @@ void test_cache_write3(const float* __restrict__ A, float* __restrict__ C)
18051805 __syncthreads();
18061806 for (int32_t i = 0; i < 64; i += 1) {
18071807 for (int32_t j = 0; j < 32; j += 1) {
1808- C_local_temp_buffer[((32 * i) + j)] = (1.00000000f + B[((32 * i) + j)]);
1808+ C_local_temp_buffer[((32 * i) + j)] = (B[((32 * i) + j)] + 1.00000000f );
18091809 };
18101810 };
18111811 __syncthreads();
@@ -1878,7 +1878,7 @@ void test_sync_threads(const float* __restrict__ A, float* __restrict__ C)
18781878 float* B = _B_temp_buffer;
18791879 for (int32_t i = 0; i < 64; i += 1) {
18801880 for (int32_t j = 0; j < 32; j += 1) {
1881- B_local_temp_buffer[((32 * i) + j)] = (2.00000000f * A[((32 * i) + j)]);
1881+ B_local_temp_buffer[((32 * i) + j)] = (A[((32 * i) + j)] * 2.00000000f );
18821882 };
18831883 };
18841884 for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) {
@@ -1889,7 +1889,7 @@ void test_sync_threads(const float* __restrict__ A, float* __restrict__ C)
18891889 };
18901890 for (int32_t i = 0; i < 64; i += 1) {
18911891 for (int32_t j = 0; j < 32; j += 1) {
1892- C_local_temp_buffer[((32 * i) + j)] = (1.00000000f + B[((32 * i) + j)]);
1892+ C_local_temp_buffer[((32 * i) + j)] = (B[((32 * i) + j)] + 1.00000000f );
18931893 };
18941894 };
18951895 for (int32_t cache_ax0_0 = 0; cache_ax0_0 < 64; cache_ax0_0 += 1) {
@@ -2716,7 +2716,7 @@ void test_compute_inline1(void* _args, int32_t num_args)
27162716 for (int32_t i = 0; i < 32; i += 1) {
27172717 for (int32_t j = 0; j < 32; j += 1) {
27182718 for (int32_t k = 0; k < 32; k += 1) {
2719- C[((1024 * i) + ((32 * j) + k))] = fma(2.00000000f, A[((32 * i) + ((1024 * j) + k))], 2.00000000f);
2719+ C[((1024 * i) + ((32 * j) + k))] = (( A[((32 * i) + ((1024 * j) + k))] + 1.00000000f) * 2.00000000f);
27202720 };
27212721 };
27222722 };
@@ -2790,7 +2790,7 @@ void test_compute_inline2(void* _args, int32_t num_args)
27902790 for (int32_t i = 0; i < 32; i += 1) {
27912791 for (int32_t j = 0; j < 32; j += 1) {
27922792 for (int32_t k = 0; k < 32; k += 1) {
2793- C[((1024 * i) + ((32 * j) + k))] = fma(2.00000000f, A[((1024 * i) + ((32 * j) + k))], 2.00000000f);
2793+ C[((1024 * i) + ((32 * j) + k))] = (( A[((1024 * i) + ((32 * j) + k))] + 1.00000000f) * 2.00000000f);
27942794 };
27952795 };
27962796 };
@@ -2855,7 +2855,7 @@ void test_compute_inline3(const float* __restrict__ A, float* __restrict__ C)
28552855 for (int32_t i = 0; i < 32; i += 1) {
28562856 for (int32_t j = 0; j < 32; j += 1) {
28572857 for (int32_t k = 0; k < 32; k += 1) {
2858- C[((1024 * i) + ((32 * j) + k))] = (2.00000000f + (2.00000000f * A[((32 * i) + ((1024 * j) + k))]) );
2858+ C[((1024 * i) + ((32 * j) + k))] = (( A[((32 * i) + ((1024 * j) + k))] + 1.00000000f) * 2.00000000f );
28592859 };
28602860 };
28612861 };
@@ -2917,7 +2917,7 @@ void test_compute_inline4(const float* __restrict__ A, float* __restrict__ C)
29172917 for (int32_t i = 0; i < 32; i += 1) {
29182918 for (int32_t j = 0; j < 32; j += 1) {
29192919 for (int32_t k = 0; k < 32; k += 1) {
2920- C[((1024 * i) + ((32 * j) + k))] = (2.00000000f + (2.00000000f * A[((1024 * i) + ((32 * j) + k))]) );
2920+ C[((1024 * i) + ((32 * j) + k))] = (( A[((1024 * i) + ((32 * j) + k))] + 1.00000000f) * 2.00000000f );
29212921 };
29222922 };
29232923 };
@@ -2979,7 +2979,7 @@ void test_compute_inline1(void* _args, int32_t num_args)
29792979 float* C = ((float*)(_C->memory));
29802980 for (int32_t i = 0; i < 32; i += 1) {
29812981 for (int32_t j = 0; j < 64; j += 1) {
2982- C[((32 * j) + i)] = fma (2.00000000f, A[((64 * i) + j)], 2.00000000f );
2982+ C[((32 * j) + i)] = (2.00000000f * (1.00000000f + A[((64 * i) + j)]) );
29832983 };
29842984 };
29852985 cinn_buffer_free((void*)(0), _B);
@@ -3047,7 +3047,7 @@ void test_compute_inline1(void* _args, int32_t num_args)
30473047 for (int32_t i = 0; i < 32; i += 1) {
30483048 for (int32_t j = 0; j < 32; j += 1) {
30493049 for (int32_t k = 0; k < 32; k += 1) {
3050- C[((32 * i) + ((1024 * j) + k))] = fma (2.00000000f, A[((1024 * i) + ((32 * j) + k))], 2.00000000f );
3050+ C[((32 * i) + ((1024 * j) + k))] = (2.00000000f * (1.00000000f + A[((1024 * i) + ((32 * j) + k))]) );
30513051 };
30523052 };
30533053 };
@@ -3125,7 +3125,7 @@ void test_copytransform1(void* _args, int32_t num_args)
31253125 for (int32_t j = 0; j < 8; j += 1) {
31263126 for (int32_t j_0 = 0; j_0 < 4; j_0 += 1) {
31273127 for (int32_t k = 0; k < 32; k += 1) {
3128- B[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (1.00000000f + A[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))]);
3128+ B[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (A[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] + 1.00000000f );
31293129 };
31303130 };
31313131 };
@@ -3136,7 +3136,7 @@ void test_copytransform1(void* _args, int32_t num_args)
31363136 for (int32_t j = 0; j < 8; j += 1) {
31373137 for (int32_t j_0 = 0; j_0 < 4; j_0 += 1) {
31383138 for (int32_t k = 0; k < 32; k += 1) {
3139- C[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (2.00000000f * B[((256 * i) + ((32 * i_0) + ((4096 * j) + ((1024 * j_0) + k))))]);
3139+ C[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (B[((256 * i) + ((32 * i_0) + ((4096 * j) + ((1024 * j_0) + k))))] * 2.00000000f );
31403140 };
31413141 };
31423142 };
@@ -3214,7 +3214,7 @@ void test_copytransform2(void* _args, int32_t num_args)
32143214 for (int32_t i_0 = 0; i_0 < 8; i_0 += 1) {
32153215 for (int32_t j = 0; j < 64; j += 1) {
32163216 for (int32_t k = 0; k < 128; k += 1) {
3217- B[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))] = (1.00000000f + A[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))]);
3217+ B[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))] = (A[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))] + 1.00000000f );
32183218 };
32193219 };
32203220 };
@@ -3224,7 +3224,7 @@ void test_copytransform2(void* _args, int32_t num_args)
32243224 for (int32_t j = 0; j < 8; j += 1) {
32253225 for (int32_t j_0 = 0; j_0 < 4; j_0 += 1) {
32263226 for (int32_t k = 0; k < 128; k += 1) {
3227- C[((32768 * i) + ((4096 * i_0) + ((512 * j) + ((128 * j_0) + k))))] = (2.00000000f * B[((65536 * i) + ((8192 * i_0) + ((512 * j) + ((128 * j_0) + k))))]);
3227+ C[((32768 * i) + ((4096 * i_0) + ((512 * j) + ((128 * j_0) + k))))] = (B[((65536 * i) + ((8192 * i_0) + ((512 * j) + ((128 * j_0) + k))))] * 2.00000000f );
32283228 };
32293229 };
32303230 };
0 commit comments