@@ -6078,204 +6078,6 @@ vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
6078
6078
}
6079
6079
6080
6080
6081
- /* Function vect_permute_store_chain.
6082
-
6083
- Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
6084
- a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
6085
- the data correctly for the stores. Return the final references for stores
6086
- in RESULT_CHAIN.
6087
-
6088
- E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
6089
- The input is 4 vectors each containing 8 elements. We assign a number to
6090
- each element, the input sequence is:
6091
-
6092
- 1st vec: 0 1 2 3 4 5 6 7
6093
- 2nd vec: 8 9 10 11 12 13 14 15
6094
- 3rd vec: 16 17 18 19 20 21 22 23
6095
- 4th vec: 24 25 26 27 28 29 30 31
6096
-
6097
- The output sequence should be:
6098
-
6099
- 1st vec: 0 8 16 24 1 9 17 25
6100
- 2nd vec: 2 10 18 26 3 11 19 27
6101
- 3rd vec: 4 12 20 28 5 13 21 30
6102
- 4th vec: 6 14 22 30 7 15 23 31
6103
-
6104
- i.e., we interleave the contents of the four vectors in their order.
6105
-
6106
- We use interleave_high/low instructions to create such output. The input of
6107
- each interleave_high/low operation is two vectors:
6108
- 1st vec 2nd vec
6109
- 0 1 2 3 4 5 6 7
6110
- the even elements of the result vector are obtained left-to-right from the
6111
- high/low elements of the first vector. The odd elements of the result are
6112
- obtained left-to-right from the high/low elements of the second vector.
6113
- The output of interleave_high will be: 0 4 1 5
6114
- and of interleave_low: 2 6 3 7
6115
-
6116
-
6117
- The permutation is done in log LENGTH stages. In each stage interleave_high
6118
- and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
6119
- where the first argument is taken from the first half of DR_CHAIN and the
6120
- second argument from it's second half.
6121
- In our example,
6122
-
6123
- I1: interleave_high (1st vec, 3rd vec)
6124
- I2: interleave_low (1st vec, 3rd vec)
6125
- I3: interleave_high (2nd vec, 4th vec)
6126
- I4: interleave_low (2nd vec, 4th vec)
6127
-
6128
- The output for the first stage is:
6129
-
6130
- I1: 0 16 1 17 2 18 3 19
6131
- I2: 4 20 5 21 6 22 7 23
6132
- I3: 8 24 9 25 10 26 11 27
6133
- I4: 12 28 13 29 14 30 15 31
6134
-
6135
- The output of the second stage, i.e. the final result is:
6136
-
6137
- I1: 0 8 16 24 1 9 17 25
6138
- I2: 2 10 18 26 3 11 19 27
6139
- I3: 4 12 20 28 5 13 21 30
6140
- I4: 6 14 22 30 7 15 23 31. */
6141
-
6142
- void
6143
- vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
6144
- unsigned int length,
6145
- stmt_vec_info stmt_info,
6146
- gimple_stmt_iterator *gsi,
6147
- vec<tree> *result_chain)
6148
- {
6149
- tree vect1, vect2, high, low;
6150
- gimple *perm_stmt;
6151
- tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6152
- tree perm_mask_low, perm_mask_high;
6153
- tree data_ref;
6154
- tree perm3_mask_low, perm3_mask_high;
6155
- unsigned int i, j, n, log_length = exact_log2 (length);
6156
-
6157
- result_chain->quick_grow (length);
6158
- memcpy (result_chain->address (), dr_chain.address (),
6159
- length * sizeof (tree));
6160
-
6161
- if (length == 3 )
6162
- {
6163
- /* vect_grouped_store_supported ensures that this is constant. */
6164
- unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
6165
- unsigned int j0 = 0 , j1 = 0 , j2 = 0 ;
6166
-
6167
- vec_perm_builder sel (nelt, nelt, 1 );
6168
- sel.quick_grow (nelt);
6169
- vec_perm_indices indices;
6170
- for (j = 0 ; j < 3 ; j++)
6171
- {
6172
- int nelt0 = ((3 - j) * nelt) % 3 ;
6173
- int nelt1 = ((3 - j) * nelt + 1 ) % 3 ;
6174
- int nelt2 = ((3 - j) * nelt + 2 ) % 3 ;
6175
-
6176
- for (i = 0 ; i < nelt; i++)
6177
- {
6178
- if (3 * i + nelt0 < nelt)
6179
- sel[3 * i + nelt0] = j0++;
6180
- if (3 * i + nelt1 < nelt)
6181
- sel[3 * i + nelt1] = nelt + j1++;
6182
- if (3 * i + nelt2 < nelt)
6183
- sel[3 * i + nelt2] = 0 ;
6184
- }
6185
- indices.new_vector (sel, 2 , nelt);
6186
- perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
6187
-
6188
- for (i = 0 ; i < nelt; i++)
6189
- {
6190
- if (3 * i + nelt0 < nelt)
6191
- sel[3 * i + nelt0] = 3 * i + nelt0;
6192
- if (3 * i + nelt1 < nelt)
6193
- sel[3 * i + nelt1] = 3 * i + nelt1;
6194
- if (3 * i + nelt2 < nelt)
6195
- sel[3 * i + nelt2] = nelt + j2++;
6196
- }
6197
- indices.new_vector (sel, 2 , nelt);
6198
- perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
6199
-
6200
- vect1 = dr_chain[0 ];
6201
- vect2 = dr_chain[1 ];
6202
-
6203
- /* Create interleaving stmt:
6204
- low = VEC_PERM_EXPR <vect1, vect2,
6205
- {j, nelt, *, j + 1, nelt + j + 1, *,
6206
- j + 2, nelt + j + 2, *, ...}> */
6207
- data_ref = make_temp_ssa_name (vectype, NULL , " vect_shuffle3_low" );
6208
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
6209
- vect2, perm3_mask_low);
6210
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6211
-
6212
- vect1 = data_ref;
6213
- vect2 = dr_chain[2 ];
6214
- /* Create interleaving stmt:
6215
- low = VEC_PERM_EXPR <vect1, vect2,
6216
- {0, 1, nelt + j, 3, 4, nelt + j + 1,
6217
- 6, 7, nelt + j + 2, ...}> */
6218
- data_ref = make_temp_ssa_name (vectype, NULL , " vect_shuffle3_high" );
6219
- perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
6220
- vect2, perm3_mask_high);
6221
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6222
- (*result_chain)[j] = data_ref;
6223
- }
6224
- }
6225
- else
6226
- {
6227
- /* If length is not equal to 3 then only power of 2 is supported. */
6228
- gcc_assert (pow2p_hwi (length));
6229
-
6230
- /* The encoding has 2 interleaved stepped patterns. */
6231
- poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
6232
- vec_perm_builder sel (nelt, 2 , 3 );
6233
- sel.quick_grow (6 );
6234
- for (i = 0 ; i < 3 ; i++)
6235
- {
6236
- sel[i * 2 ] = i;
6237
- sel[i * 2 + 1 ] = i + nelt;
6238
- }
6239
- vec_perm_indices indices (sel, 2 , nelt);
6240
- perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
6241
-
6242
- for (i = 0 ; i < 6 ; i++)
6243
- sel[i] += exact_div (nelt, 2 );
6244
- indices.new_vector (sel, 2 , nelt);
6245
- perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
6246
-
6247
- for (i = 0 , n = log_length; i < n; i++)
6248
- {
6249
- for (j = 0 ; j < length/2 ; j++)
6250
- {
6251
- vect1 = dr_chain[j];
6252
- vect2 = dr_chain[j+length/2 ];
6253
-
6254
- /* Create interleaving stmt:
6255
- high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
6256
- ...}> */
6257
- high = make_temp_ssa_name (vectype, NULL , " vect_inter_high" );
6258
- perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
6259
- vect2, perm_mask_high);
6260
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6261
- (*result_chain)[2 *j] = high;
6262
-
6263
- /* Create interleaving stmt:
6264
- low = VEC_PERM_EXPR <vect1, vect2,
6265
- {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
6266
- ...}> */
6267
- low = make_temp_ssa_name (vectype, NULL , " vect_inter_low" );
6268
- perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
6269
- vect2, perm_mask_low);
6270
- vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6271
- (*result_chain)[2 *j+1 ] = low;
6272
- }
6273
- memcpy (dr_chain.address (), result_chain->address (),
6274
- length * sizeof (tree));
6275
- }
6276
- }
6277
- }
6278
-
6279
6081
/* Function vect_setup_realignment
6280
6082
6281
6083
This function is called when vectorizing an unaligned load using
0 commit comments