@@ -1398,7 +1398,7 @@ void test_cache_read1(void* _args, int32_t num_args)
1398
1398
};
1399
1399
for (int32_t i = 0; i < 32; i += 1) {
1400
1400
for (int32_t j = 0; j < 32; j += 1) {
1401
- B[((32 * i) + j)] = (2.00000000f * A_local_temp_buffer[((64 * i) + j)]);
1401
+ B[((32 * i) + j)] = (A_local_temp_buffer[((64 * i) + j)] * 2.00000000f );
1402
1402
};
1403
1403
};
1404
1404
for (int32_t cache_ax0_0 = 0; cache_ax0_0 < 16; cache_ax0_0 += 1) {
@@ -1408,7 +1408,7 @@ void test_cache_read1(void* _args, int32_t num_args)
1408
1408
};
1409
1409
for (int32_t i = 0; i < 16; i += 1) {
1410
1410
for (int32_t j = 0; j < 16; j += 1) {
1411
- C[((16 * i) + j)] = (1.00000000f + B_local_temp_buffer[((32 * i) + j)]);
1411
+ C[((16 * i) + j)] = (B_local_temp_buffer[((32 * i) + j)] + 1.00000000f );
1412
1412
};
1413
1413
};
1414
1414
cinn_buffer_free((void*)(0), _B);
@@ -1480,7 +1480,7 @@ void test_cache_read2(void* _args, int32_t num_args)
1480
1480
for (int32_t i = 0; i < 64; i += 1) {
1481
1481
for (int32_t j = 0; j < 32; j += 1) {
1482
1482
A_local_temp_buffer[((32 * i) + j)] = A[((32 * i) + j)];
1483
- B[((32 * i) + j)] = (2.00000000f * A_local_temp_buffer[((32 * i) + j)]);
1483
+ B[((32 * i) + j)] = (A_local_temp_buffer[((32 * i) + j)] * 2.00000000f );
1484
1484
};
1485
1485
};
1486
1486
cinn_buffer_free((void*)(0), _B);
@@ -1553,7 +1553,7 @@ void test_cache_write1(void* _args, int32_t num_args)
1553
1553
float* C = ((float*)(_C->memory));
1554
1554
for (int32_t i = 0; i < 64; i += 1) {
1555
1555
for (int32_t j = 0; j < 32; j += 1) {
1556
- B_local_temp_buffer[((32 * i) + j)] = (2.00000000f * A[((32 * i) + j)]);
1556
+ B_local_temp_buffer[((32 * i) + j)] = (A[((32 * i) + j)] * 2.00000000f );
1557
1557
};
1558
1558
};
1559
1559
for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) {
@@ -1563,7 +1563,7 @@ void test_cache_write1(void* _args, int32_t num_args)
1563
1563
};
1564
1564
for (int32_t i = 0; i < 64; i += 1) {
1565
1565
for (int32_t j = 0; j < 32; j += 1) {
1566
- C_local_temp_buffer[((32 * i) + j)] = (1.00000000f + B[((32 * i) + j)]);
1566
+ C_local_temp_buffer[((32 * i) + j)] = (B[((32 * i) + j)] + 1.00000000f );
1567
1567
};
1568
1568
};
1569
1569
for (int32_t cache_ax0_0 = 0; cache_ax0_0 < 64; cache_ax0_0 += 1) {
@@ -1637,7 +1637,7 @@ void test_cache_write2(void* _args, int32_t num_args)
1637
1637
float* B = ((float*)(_B->memory));
1638
1638
for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) {
1639
1639
for (int32_t cache_ax1 = 0; cache_ax1 < 32; cache_ax1 += 1) {
1640
- B_local_temp_buffer[((32 * cache_ax0) + cache_ax1)] = (2.00000000f * A[((32 * cache_ax0) + cache_ax1)]);
1640
+ B_local_temp_buffer[((32 * cache_ax0) + cache_ax1)] = (A[((32 * cache_ax0) + cache_ax1)] * 2.00000000f );
1641
1641
B[((32 * cache_ax0) + cache_ax1)] = B_local_temp_buffer[((32 * cache_ax0) + cache_ax1)];
1642
1642
};
1643
1643
};
@@ -1713,7 +1713,7 @@ void test_cache_read3(const float* __restrict__ A, float* __restrict__ C)
1713
1713
};
1714
1714
for (int32_t i = 0; i < 32; i += 1) {
1715
1715
for (int32_t j = 0; j < 32; j += 1) {
1716
- B[((32 * i) + j)] = (2.00000000f * A_local_temp_buffer[((64 * i) + j)]);
1716
+ B[((32 * i) + j)] = (A_local_temp_buffer[((64 * i) + j)] * 2.00000000f );
1717
1717
};
1718
1718
__syncthreads();
1719
1719
};
@@ -1725,7 +1725,7 @@ void test_cache_read3(const float* __restrict__ A, float* __restrict__ C)
1725
1725
for (int32_t i = 0; i < 16; i += 1) {
1726
1726
__syncthreads();
1727
1727
for (int32_t j = 0; j < 16; j += 1) {
1728
- C[((16 * i) + j)] = (1.00000000f + B_local_temp_buffer[((32 * i) + j)]);
1728
+ C[((16 * i) + j)] = (B_local_temp_buffer[((32 * i) + j)] + 1.00000000f );
1729
1729
};
1730
1730
};
1731
1731
}
@@ -1794,7 +1794,7 @@ void test_cache_write3(const float* __restrict__ A, float* __restrict__ C)
1794
1794
float* B = _B_temp_buffer;
1795
1795
for (int32_t i = 0; i < 64; i += 1) {
1796
1796
for (int32_t j = 0; j < 32; j += 1) {
1797
- B_local_temp_buffer[((32 * i) + j)] = (2.00000000f * A[((32 * i) + j)]);
1797
+ B_local_temp_buffer[((32 * i) + j)] = (A[((32 * i) + j)] * 2.00000000f );
1798
1798
};
1799
1799
};
1800
1800
for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) {
@@ -1805,7 +1805,7 @@ void test_cache_write3(const float* __restrict__ A, float* __restrict__ C)
1805
1805
__syncthreads();
1806
1806
for (int32_t i = 0; i < 64; i += 1) {
1807
1807
for (int32_t j = 0; j < 32; j += 1) {
1808
- C_local_temp_buffer[((32 * i) + j)] = (1.00000000f + B[((32 * i) + j)]);
1808
+ C_local_temp_buffer[((32 * i) + j)] = (B[((32 * i) + j)] + 1.00000000f );
1809
1809
};
1810
1810
};
1811
1811
__syncthreads();
@@ -1878,7 +1878,7 @@ void test_sync_threads(const float* __restrict__ A, float* __restrict__ C)
1878
1878
float* B = _B_temp_buffer;
1879
1879
for (int32_t i = 0; i < 64; i += 1) {
1880
1880
for (int32_t j = 0; j < 32; j += 1) {
1881
- B_local_temp_buffer[((32 * i) + j)] = (2.00000000f * A[((32 * i) + j)]);
1881
+ B_local_temp_buffer[((32 * i) + j)] = (A[((32 * i) + j)] * 2.00000000f );
1882
1882
};
1883
1883
};
1884
1884
for (int32_t cache_ax0 = 0; cache_ax0 < 64; cache_ax0 += 1) {
@@ -1889,7 +1889,7 @@ void test_sync_threads(const float* __restrict__ A, float* __restrict__ C)
1889
1889
};
1890
1890
for (int32_t i = 0; i < 64; i += 1) {
1891
1891
for (int32_t j = 0; j < 32; j += 1) {
1892
- C_local_temp_buffer[((32 * i) + j)] = (1.00000000f + B[((32 * i) + j)]);
1892
+ C_local_temp_buffer[((32 * i) + j)] = (B[((32 * i) + j)] + 1.00000000f );
1893
1893
};
1894
1894
};
1895
1895
for (int32_t cache_ax0_0 = 0; cache_ax0_0 < 64; cache_ax0_0 += 1) {
@@ -2716,7 +2716,7 @@ void test_compute_inline1(void* _args, int32_t num_args)
2716
2716
for (int32_t i = 0; i < 32; i += 1) {
2717
2717
for (int32_t j = 0; j < 32; j += 1) {
2718
2718
for (int32_t k = 0; k < 32; k += 1) {
2719
- C[((1024 * i) + ((32 * j) + k))] = fma(2.00000000f, A[((32 * i) + ((1024 * j) + k))], 2.00000000f);
2719
+ C[((1024 * i) + ((32 * j) + k))] = (( A[((32 * i) + ((1024 * j) + k))] + 1.00000000f) * 2.00000000f);
2720
2720
};
2721
2721
};
2722
2722
};
@@ -2790,7 +2790,7 @@ void test_compute_inline2(void* _args, int32_t num_args)
2790
2790
for (int32_t i = 0; i < 32; i += 1) {
2791
2791
for (int32_t j = 0; j < 32; j += 1) {
2792
2792
for (int32_t k = 0; k < 32; k += 1) {
2793
- C[((1024 * i) + ((32 * j) + k))] = fma(2.00000000f, A[((1024 * i) + ((32 * j) + k))], 2.00000000f);
2793
+ C[((1024 * i) + ((32 * j) + k))] = (( A[((1024 * i) + ((32 * j) + k))] + 1.00000000f) * 2.00000000f);
2794
2794
};
2795
2795
};
2796
2796
};
@@ -2855,7 +2855,7 @@ void test_compute_inline3(const float* __restrict__ A, float* __restrict__ C)
2855
2855
for (int32_t i = 0; i < 32; i += 1) {
2856
2856
for (int32_t j = 0; j < 32; j += 1) {
2857
2857
for (int32_t k = 0; k < 32; k += 1) {
2858
- C[((1024 * i) + ((32 * j) + k))] = (2.00000000f + (2.00000000f * A[((32 * i) + ((1024 * j) + k))]) );
2858
+ C[((1024 * i) + ((32 * j) + k))] = (( A[((32 * i) + ((1024 * j) + k))] + 1.00000000f) * 2.00000000f );
2859
2859
};
2860
2860
};
2861
2861
};
@@ -2917,7 +2917,7 @@ void test_compute_inline4(const float* __restrict__ A, float* __restrict__ C)
2917
2917
for (int32_t i = 0; i < 32; i += 1) {
2918
2918
for (int32_t j = 0; j < 32; j += 1) {
2919
2919
for (int32_t k = 0; k < 32; k += 1) {
2920
- C[((1024 * i) + ((32 * j) + k))] = (2.00000000f + (2.00000000f * A[((1024 * i) + ((32 * j) + k))]) );
2920
+ C[((1024 * i) + ((32 * j) + k))] = (( A[((1024 * i) + ((32 * j) + k))] + 1.00000000f) * 2.00000000f );
2921
2921
};
2922
2922
};
2923
2923
};
@@ -2979,7 +2979,7 @@ void test_compute_inline1(void* _args, int32_t num_args)
2979
2979
float* C = ((float*)(_C->memory));
2980
2980
for (int32_t i = 0; i < 32; i += 1) {
2981
2981
for (int32_t j = 0; j < 64; j += 1) {
2982
- C[((32 * j) + i)] = fma (2.00000000f, A[((64 * i) + j)], 2.00000000f );
2982
+ C[((32 * j) + i)] = (2.00000000f * (1.00000000f + A[((64 * i) + j)]) );
2983
2983
};
2984
2984
};
2985
2985
cinn_buffer_free((void*)(0), _B);
@@ -3047,7 +3047,7 @@ void test_compute_inline1(void* _args, int32_t num_args)
3047
3047
for (int32_t i = 0; i < 32; i += 1) {
3048
3048
for (int32_t j = 0; j < 32; j += 1) {
3049
3049
for (int32_t k = 0; k < 32; k += 1) {
3050
- C[((32 * i) + ((1024 * j) + k))] = fma (2.00000000f, A[((1024 * i) + ((32 * j) + k))], 2.00000000f );
3050
+ C[((32 * i) + ((1024 * j) + k))] = (2.00000000f * (1.00000000f + A[((1024 * i) + ((32 * j) + k))]) );
3051
3051
};
3052
3052
};
3053
3053
};
@@ -3125,7 +3125,7 @@ void test_copytransform1(void* _args, int32_t num_args)
3125
3125
for (int32_t j = 0; j < 8; j += 1) {
3126
3126
for (int32_t j_0 = 0; j_0 < 4; j_0 += 1) {
3127
3127
for (int32_t k = 0; k < 32; k += 1) {
3128
- B[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (1.00000000f + A[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))]);
3128
+ B[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (A[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] + 1.00000000f );
3129
3129
};
3130
3130
};
3131
3131
};
@@ -3136,7 +3136,7 @@ void test_copytransform1(void* _args, int32_t num_args)
3136
3136
for (int32_t j = 0; j < 8; j += 1) {
3137
3137
for (int32_t j_0 = 0; j_0 < 4; j_0 += 1) {
3138
3138
for (int32_t k = 0; k < 32; k += 1) {
3139
- C[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (2.00000000f * B[((256 * i) + ((32 * i_0) + ((4096 * j) + ((1024 * j_0) + k))))]);
3139
+ C[((8192 * i) + ((1024 * i_0) + ((128 * j) + ((32 * j_0) + k))))] = (B[((256 * i) + ((32 * i_0) + ((4096 * j) + ((1024 * j_0) + k))))] * 2.00000000f );
3140
3140
};
3141
3141
};
3142
3142
};
@@ -3214,7 +3214,7 @@ void test_copytransform2(void* _args, int32_t num_args)
3214
3214
for (int32_t i_0 = 0; i_0 < 8; i_0 += 1) {
3215
3215
for (int32_t j = 0; j < 64; j += 1) {
3216
3216
for (int32_t k = 0; k < 128; k += 1) {
3217
- B[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))] = (1.00000000f + A[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))]);
3217
+ B[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))] = (A[((65536 * i) + ((8192 * i_0) + ((128 * j) + k)))] + 1.00000000f );
3218
3218
};
3219
3219
};
3220
3220
};
@@ -3224,7 +3224,7 @@ void test_copytransform2(void* _args, int32_t num_args)
3224
3224
for (int32_t j = 0; j < 8; j += 1) {
3225
3225
for (int32_t j_0 = 0; j_0 < 4; j_0 += 1) {
3226
3226
for (int32_t k = 0; k < 128; k += 1) {
3227
- C[((32768 * i) + ((4096 * i_0) + ((512 * j) + ((128 * j_0) + k))))] = (2.00000000f * B[((65536 * i) + ((8192 * i_0) + ((512 * j) + ((128 * j_0) + k))))]);
3227
+ C[((32768 * i) + ((4096 * i_0) + ((512 * j) + ((128 * j_0) + k))))] = (B[((65536 * i) + ((8192 * i_0) + ((512 * j) + ((128 * j_0) + k))))] * 2.00000000f );
3228
3228
};
3229
3229
};
3230
3230
};
0 commit comments