Skip to content

Commit fcadd6d

Browse files
committed
Remove store interleaving support
The following removes the non-SLP store interleaving support which was already almost unused. * tree-vectorizer.h (vect_permute_store_chain): Remove. * tree-vect-data-refs.cc (vect_permute_store_chain): Likewise. * tree-vect-stmts.cc (vectorizable_store): Remove comment about store interleaving.
1 parent 727276a commit fcadd6d

File tree

3 files changed

+0
-234
lines changed

3 files changed

+0
-234
lines changed

gcc/tree-vect-data-refs.cc

Lines changed: 0 additions & 198 deletions
Original file line numberDiff line numberDiff line change
@@ -6078,204 +6078,6 @@ vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
60786078
}
60796079

60806080

6081-
/* Function vect_permute_store_chain.
6082-
6083-
Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
6084-
a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
6085-
the data correctly for the stores. Return the final references for stores
6086-
in RESULT_CHAIN.
6087-
6088-
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
6089-
The input is 4 vectors each containing 8 elements. We assign a number to
6090-
each element, the input sequence is:
6091-
6092-
1st vec: 0 1 2 3 4 5 6 7
6093-
2nd vec: 8 9 10 11 12 13 14 15
6094-
3rd vec: 16 17 18 19 20 21 22 23
6095-
4th vec: 24 25 26 27 28 29 30 31
6096-
6097-
The output sequence should be:
6098-
6099-
1st vec: 0 8 16 24 1 9 17 25
6100-
2nd vec: 2 10 18 26 3 11 19 27
6101-
3rd vec: 4 12 20 28 5 13 21 30
6102-
4th vec: 6 14 22 30 7 15 23 31
6103-
6104-
i.e., we interleave the contents of the four vectors in their order.
6105-
6106-
We use interleave_high/low instructions to create such output. The input of
6107-
each interleave_high/low operation is two vectors:
6108-
1st vec 2nd vec
6109-
0 1 2 3 4 5 6 7
6110-
the even elements of the result vector are obtained left-to-right from the
6111-
high/low elements of the first vector. The odd elements of the result are
6112-
obtained left-to-right from the high/low elements of the second vector.
6113-
The output of interleave_high will be: 0 4 1 5
6114-
and of interleave_low: 2 6 3 7
6115-
6116-
6117-
The permutation is done in log LENGTH stages. In each stage interleave_high
6118-
and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
6119-
where the first argument is taken from the first half of DR_CHAIN and the
6120-
second argument from it's second half.
6121-
In our example,
6122-
6123-
I1: interleave_high (1st vec, 3rd vec)
6124-
I2: interleave_low (1st vec, 3rd vec)
6125-
I3: interleave_high (2nd vec, 4th vec)
6126-
I4: interleave_low (2nd vec, 4th vec)
6127-
6128-
The output for the first stage is:
6129-
6130-
I1: 0 16 1 17 2 18 3 19
6131-
I2: 4 20 5 21 6 22 7 23
6132-
I3: 8 24 9 25 10 26 11 27
6133-
I4: 12 28 13 29 14 30 15 31
6134-
6135-
The output of the second stage, i.e. the final result is:
6136-
6137-
I1: 0 8 16 24 1 9 17 25
6138-
I2: 2 10 18 26 3 11 19 27
6139-
I3: 4 12 20 28 5 13 21 30
6140-
I4: 6 14 22 30 7 15 23 31. */
6141-
6142-
void
6143-
vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
6144-
unsigned int length,
6145-
stmt_vec_info stmt_info,
6146-
gimple_stmt_iterator *gsi,
6147-
vec<tree> *result_chain)
6148-
{
6149-
tree vect1, vect2, high, low;
6150-
gimple *perm_stmt;
6151-
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6152-
tree perm_mask_low, perm_mask_high;
6153-
tree data_ref;
6154-
tree perm3_mask_low, perm3_mask_high;
6155-
unsigned int i, j, n, log_length = exact_log2 (length);
6156-
6157-
result_chain->quick_grow (length);
6158-
memcpy (result_chain->address (), dr_chain.address (),
6159-
length * sizeof (tree));
6160-
6161-
if (length == 3)
6162-
{
6163-
/* vect_grouped_store_supported ensures that this is constant. */
6164-
unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
6165-
unsigned int j0 = 0, j1 = 0, j2 = 0;
6166-
6167-
vec_perm_builder sel (nelt, nelt, 1);
6168-
sel.quick_grow (nelt);
6169-
vec_perm_indices indices;
6170-
for (j = 0; j < 3; j++)
6171-
{
6172-
int nelt0 = ((3 - j) * nelt) % 3;
6173-
int nelt1 = ((3 - j) * nelt + 1) % 3;
6174-
int nelt2 = ((3 - j) * nelt + 2) % 3;
6175-
6176-
for (i = 0; i < nelt; i++)
6177-
{
6178-
if (3 * i + nelt0 < nelt)
6179-
sel[3 * i + nelt0] = j0++;
6180-
if (3 * i + nelt1 < nelt)
6181-
sel[3 * i + nelt1] = nelt + j1++;
6182-
if (3 * i + nelt2 < nelt)
6183-
sel[3 * i + nelt2] = 0;
6184-
}
6185-
indices.new_vector (sel, 2, nelt);
6186-
perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
6187-
6188-
for (i = 0; i < nelt; i++)
6189-
{
6190-
if (3 * i + nelt0 < nelt)
6191-
sel[3 * i + nelt0] = 3 * i + nelt0;
6192-
if (3 * i + nelt1 < nelt)
6193-
sel[3 * i + nelt1] = 3 * i + nelt1;
6194-
if (3 * i + nelt2 < nelt)
6195-
sel[3 * i + nelt2] = nelt + j2++;
6196-
}
6197-
indices.new_vector (sel, 2, nelt);
6198-
perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
6199-
6200-
vect1 = dr_chain[0];
6201-
vect2 = dr_chain[1];
6202-
6203-
/* Create interleaving stmt:
6204-
low = VEC_PERM_EXPR <vect1, vect2,
6205-
{j, nelt, *, j + 1, nelt + j + 1, *,
6206-
j + 2, nelt + j + 2, *, ...}> */
6207-
data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
6208-
perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
6209-
vect2, perm3_mask_low);
6210-
vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6211-
6212-
vect1 = data_ref;
6213-
vect2 = dr_chain[2];
6214-
/* Create interleaving stmt:
6215-
low = VEC_PERM_EXPR <vect1, vect2,
6216-
{0, 1, nelt + j, 3, 4, nelt + j + 1,
6217-
6, 7, nelt + j + 2, ...}> */
6218-
data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
6219-
perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
6220-
vect2, perm3_mask_high);
6221-
vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6222-
(*result_chain)[j] = data_ref;
6223-
}
6224-
}
6225-
else
6226-
{
6227-
/* If length is not equal to 3 then only power of 2 is supported. */
6228-
gcc_assert (pow2p_hwi (length));
6229-
6230-
/* The encoding has 2 interleaved stepped patterns. */
6231-
poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
6232-
vec_perm_builder sel (nelt, 2, 3);
6233-
sel.quick_grow (6);
6234-
for (i = 0; i < 3; i++)
6235-
{
6236-
sel[i * 2] = i;
6237-
sel[i * 2 + 1] = i + nelt;
6238-
}
6239-
vec_perm_indices indices (sel, 2, nelt);
6240-
perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
6241-
6242-
for (i = 0; i < 6; i++)
6243-
sel[i] += exact_div (nelt, 2);
6244-
indices.new_vector (sel, 2, nelt);
6245-
perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
6246-
6247-
for (i = 0, n = log_length; i < n; i++)
6248-
{
6249-
for (j = 0; j < length/2; j++)
6250-
{
6251-
vect1 = dr_chain[j];
6252-
vect2 = dr_chain[j+length/2];
6253-
6254-
/* Create interleaving stmt:
6255-
high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
6256-
...}> */
6257-
high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
6258-
perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
6259-
vect2, perm_mask_high);
6260-
vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6261-
(*result_chain)[2*j] = high;
6262-
6263-
/* Create interleaving stmt:
6264-
low = VEC_PERM_EXPR <vect1, vect2,
6265-
{nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
6266-
...}> */
6267-
low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
6268-
perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
6269-
vect2, perm_mask_low);
6270-
vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
6271-
(*result_chain)[2*j+1] = low;
6272-
}
6273-
memcpy (dr_chain.address (), result_chain->address (),
6274-
length * sizeof (tree));
6275-
}
6276-
}
6277-
}
6278-
62796081
/* Function vect_setup_realignment
62806082
62816083
This function is called when vectorizing an unaligned load using

gcc/tree-vect-stmts.cc

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8413,39 +8413,6 @@ vectorizable_store (vec_info *vinfo,
84138413
more than one vector stmt - i.e - we need to "unroll" the
84148414
vector stmt by a factor VF/nunits. */
84158415

8416-
/* In case of interleaving (non-unit grouped access):
8417-
8418-
S1: &base + 2 = x2
8419-
S2: &base = x0
8420-
S3: &base + 1 = x1
8421-
S4: &base + 3 = x3
8422-
8423-
We create vectorized stores starting from base address (the access of the
8424-
first stmt in the chain (S2 in the above example), when the last store stmt
8425-
of the chain (S4) is reached:
8426-
8427-
VS1: &base = vx2
8428-
VS2: &base + vec_size*1 = vx0
8429-
VS3: &base + vec_size*2 = vx1
8430-
VS4: &base + vec_size*3 = vx3
8431-
8432-
Then permutation statements are generated:
8433-
8434-
VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8435-
VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8436-
...
8437-
8438-
And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8439-
(the order of the data-refs in the output of vect_permute_store_chain
8440-
corresponds to the order of scalar stmts in the interleaving chain - see
8441-
the documentation of vect_permute_store_chain()).
8442-
8443-
In case of both multiple types and interleaving, above vector stores and
8444-
permutation stmts are created for every copy. The result vector stmts are
8445-
put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8446-
STMT_VINFO_RELATED_STMT for the next copies.
8447-
*/
8448-
84498416
auto_vec<tree> dr_chain (group_size);
84508417
auto_vec<tree> vec_masks;
84518418
tree vec_mask = NULL;

gcc/tree-vectorizer.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2568,9 +2568,6 @@ extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, boo
25682568
extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
25692569
extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
25702570
bool, vec<int> * = nullptr);
2571-
extern void vect_permute_store_chain (vec_info *, vec<tree> &,
2572-
unsigned int, stmt_vec_info,
2573-
gimple_stmt_iterator *, vec<tree> *);
25742571
extern tree vect_setup_realignment (vec_info *,
25752572
stmt_vec_info, gimple_stmt_iterator *,
25762573
tree *, enum dr_alignment_support, tree,

0 commit comments

Comments
 (0)