Remove store interleaving support

rguenth · rguenth · commit fcadd6d32398 · 2025-07-25T12:44:55.000+02:00
The following removes the non-SLP store interleaving support which
was already almost unused.

	* tree-vectorizer.h (vect_permute_store_chain): Remove.
	* tree-vect-data-refs.cc (vect_permute_store_chain): Likewise.
	* tree-vect-stmts.cc (vectorizable_store): Remove comment
	about store interleaving.
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
@@ -6078,204 +6078,6 @@ vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
 }
 
 
-/* Function vect_permute_store_chain.
-
-   Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
-   a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
-   the data correctly for the stores.  Return the final references for stores
-   in RESULT_CHAIN.
-
-   E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
-   The input is 4 vectors each containing 8 elements.  We assign a number to
-   each element, the input sequence is:
-
-   1st vec:   0  1  2  3  4  5  6  7
-   2nd vec:   8  9 10 11 12 13 14 15
-   3rd vec:  16 17 18 19 20 21 22 23
-   4th vec:  24 25 26 27 28 29 30 31
-
-   The output sequence should be:
-
-   1st vec:  0  8 16 24  1  9 17 25
-   2nd vec:  2 10 18 26  3 11 19 27
-   3rd vec:  4 12 20 28  5 13 21 30
-   4th vec:  6 14 22 30  7 15 23 31
-
-   i.e., we interleave the contents of the four vectors in their order.
-
-   We use interleave_high/low instructions to create such output.  The input of
-   each interleave_high/low operation is two vectors:
-   1st vec    2nd vec
-   0 1 2 3    4 5 6 7
-   the even elements of the result vector are obtained left-to-right from the
-   high/low elements of the first vector.  The odd elements of the result are
-   obtained left-to-right from the high/low elements of the second vector.
-   The output of interleave_high will be:   0 4 1 5
-   and of interleave_low:                   2 6 3 7
-
-
-   The permutation is done in log LENGTH stages.  In each stage interleave_high
-   and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
-   where the first argument is taken from the first half of DR_CHAIN and the
-   second argument from it's second half.
-   In our example,
-
-   I1: interleave_high (1st vec, 3rd vec)
-   I2: interleave_low (1st vec, 3rd vec)
-   I3: interleave_high (2nd vec, 4th vec)
-   I4: interleave_low (2nd vec, 4th vec)
-
-   The output for the first stage is:
-
-   I1:  0 16  1 17  2 18  3 19
-   I2:  4 20  5 21  6 22  7 23
-   I3:  8 24  9 25 10 26 11 27
-   I4: 12 28 13 29 14 30 15 31
-
-   The output of the second stage, i.e. the final result is:
-
-   I1:  0  8 16 24  1  9 17 25
-   I2:  2 10 18 26  3 11 19 27
-   I3:  4 12 20 28  5 13 21 30
-   I4:  6 14 22 30  7 15 23 31.  */
-
-void
-vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
-			  unsigned int length,
-			  stmt_vec_info stmt_info,
-			  gimple_stmt_iterator *gsi,
-			  vec<tree> *result_chain)
-{
-  tree vect1, vect2, high, low;
-  gimple *perm_stmt;
-  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-  tree perm_mask_low, perm_mask_high;
-  tree data_ref;
-  tree perm3_mask_low, perm3_mask_high;
-  unsigned int i, j, n, log_length = exact_log2 (length);
-
-  result_chain->quick_grow (length);
-  memcpy (result_chain->address (), dr_chain.address (),
-	  length * sizeof (tree));
-
-  if (length == 3)
-    {
-      /* vect_grouped_store_supported ensures that this is constant.  */
-      unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
-      unsigned int j0 = 0, j1 = 0, j2 = 0;
-
-      vec_perm_builder sel (nelt, nelt, 1);
-      sel.quick_grow (nelt);
-      vec_perm_indices indices;
-      for (j = 0; j < 3; j++)
-        {
-	  int nelt0 = ((3 - j) * nelt) % 3;
-	  int nelt1 = ((3 - j) * nelt + 1) % 3;
-	  int nelt2 = ((3 - j) * nelt + 2) % 3;
-
-	  for (i = 0; i < nelt; i++)
-	    {
-	      if (3 * i + nelt0 < nelt)
-		sel[3 * i + nelt0] = j0++;
-	      if (3 * i + nelt1 < nelt)
-		sel[3 * i + nelt1] = nelt + j1++;
-	      if (3 * i + nelt2 < nelt)
-		sel[3 * i + nelt2] = 0;
-	    }
-	  indices.new_vector (sel, 2, nelt);
-	  perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
-	  for (i = 0; i < nelt; i++)
-	    {
-	      if (3 * i + nelt0 < nelt)
-		sel[3 * i + nelt0] = 3 * i + nelt0;
-	      if (3 * i + nelt1 < nelt)
-		sel[3 * i + nelt1] = 3 * i + nelt1;
-	      if (3 * i + nelt2 < nelt)
-		sel[3 * i + nelt2] = nelt + j2++;
-	    }
-	  indices.new_vector (sel, 2, nelt);
-	  perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
-	  vect1 = dr_chain[0];
-	  vect2 = dr_chain[1];
-
-	  /* Create interleaving stmt:
-	     low = VEC_PERM_EXPR <vect1, vect2,
-				  {j, nelt, *, j + 1, nelt + j + 1, *,
-				   j + 2, nelt + j + 2, *, ...}>  */
-	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
-	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
-					   vect2, perm3_mask_low);
-	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-
-	  vect1 = data_ref;
-	  vect2 = dr_chain[2];
-	  /* Create interleaving stmt:
-	     low = VEC_PERM_EXPR <vect1, vect2,
-				  {0, 1, nelt + j, 3, 4, nelt + j + 1,
-				   6, 7, nelt + j + 2, ...}>  */
-	  data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
-	  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
-					   vect2, perm3_mask_high);
-	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-	  (*result_chain)[j] = data_ref;
-	}
-    }
-  else
-    {
-      /* If length is not equal to 3 then only power of 2 is supported.  */
-      gcc_assert (pow2p_hwi (length));
-
-      /* The encoding has 2 interleaved stepped patterns.  */
-      poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
-      vec_perm_builder sel (nelt, 2, 3);
-      sel.quick_grow (6);
-      for (i = 0; i < 3; i++)
-	{
-	  sel[i * 2] = i;
-	  sel[i * 2 + 1] = i + nelt;
-	}
-	vec_perm_indices indices (sel, 2, nelt);
-	perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
-
-	for (i = 0; i < 6; i++)
-	  sel[i] += exact_div (nelt, 2);
-	indices.new_vector (sel, 2, nelt);
-	perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
-
-	for (i = 0, n = log_length; i < n; i++)
-	  {
-	    for (j = 0; j < length/2; j++)
-	      {
-		vect1 = dr_chain[j];
-		vect2 = dr_chain[j+length/2];
-
-		/* Create interleaving stmt:
-		   high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
-							...}>  */
-		high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
-		perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
-						 vect2, perm_mask_high);
-		vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-		(*result_chain)[2*j] = high;
-
-		/* Create interleaving stmt:
-		   low = VEC_PERM_EXPR <vect1, vect2,
-					{nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
-					 ...}>  */
-		low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
-		perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
-						 vect2, perm_mask_low);
-		vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
-		(*result_chain)[2*j+1] = low;
-	      }
-	    memcpy (dr_chain.address (), result_chain->address (),
-		    length * sizeof (tree));
-	  }
-    }
-}
-
 /* Function vect_setup_realignment
 
    This function is called when vectorizing an unaligned load using
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
@@ -8413,39 +8413,6 @@ vectorizable_store (vec_info *vinfo,
      more than one vector stmt - i.e - we need to "unroll" the
      vector stmt by a factor VF/nunits.  */
 
-  /* In case of interleaving (non-unit grouped access):
-
-        S1:  &base + 2 = x2
-        S2:  &base = x0
-        S3:  &base + 1 = x1
-        S4:  &base + 3 = x3
-
-     We create vectorized stores starting from base address (the access of the
-     first stmt in the chain (S2 in the above example), when the last store stmt
-     of the chain (S4) is reached:
-
-        VS1: &base = vx2
-	VS2: &base + vec_size*1 = vx0
-	VS3: &base + vec_size*2 = vx1
-	VS4: &base + vec_size*3 = vx3
-
-     Then permutation statements are generated:
-
-	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
-	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
-	...
-
-     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
-     (the order of the data-refs in the output of vect_permute_store_chain
-     corresponds to the order of scalar stmts in the interleaving chain - see
-     the documentation of vect_permute_store_chain()).
-
-     In case of both multiple types and interleaving, above vector stores and
-     permutation stmts are created for every copy.  The result vector stmts are
-     put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
-     STMT_VINFO_RELATED_STMT for the next copies.
-  */
-
   auto_vec<tree> dr_chain (group_size);
   auto_vec<tree> vec_masks;
   tree vec_mask = NULL;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
@@ -2568,9 +2568,6 @@ extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, boo
 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
 extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT,
 					      bool, vec<int> * = nullptr);
-extern void vect_permute_store_chain (vec_info *, vec<tree> &,
-				      unsigned int, stmt_vec_info,
-				      gimple_stmt_iterator *, vec<tree> *);
 extern tree vect_setup_realignment (vec_info *,
 				    stmt_vec_info, gimple_stmt_iterator *,
 				    tree *, enum dr_alignment_support, tree,