Merge branch 'main' into amdgpu-loopalign

hjagasiaAMD · web-flow · commit 525ccd4d6fe4 · 2025-08-11T14:40:52.000-05:00
diff --git a/flang/include/flang/Evaluate/rewrite.h b/flang/include/flang/Evaluate/rewrite.h
@@ -0,0 +1,160 @@
+//===-- include/flang/Evaluate/rewrite.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef FORTRAN_EVALUATE_REWRITE_H_
+#define FORTRAN_EVALUATE_REWRITE_H_
+
+#include "flang/Common/visit.h"
+#include "flang/Evaluate/expression.h"
+#include "flang/Support/Fortran.h"
+#include "llvm/ADT/STLExtras.h"
+
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <variant>
+
+namespace Fortran::evaluate {
+namespace rewrite {
+namespace detail {
+template <typename, typename = void> //
+struct IsOperation {
+  static constexpr bool value{false};
+};
+
+template <typename T>
+struct IsOperation<T, std::void_t<decltype(T::operands)>> {
+  static constexpr bool value{true};
+};
+} // namespace detail
+
+template <typename T>
+constexpr bool is_operation_v{detail::IsOperation<T>::value};
+
+/// Individual Expr<T> rewriter that simply constructs an expression that is
+/// identical to the input. This is a suitable base class for all user-defined
+/// rewriters.
+struct Identity {
+  template <typename T, typename U>
+  Expr<T> operator()(Expr<T> &&x, const U &op) {
+    return std::move(x);
+  }
+};
+
+/// Bottom-up Expr<T> rewriter.
+///
+/// The Mutator traverses and reconstructs given Expr<T>. Going bottom-up,
+/// whenever the traversal visits a sub-node of type Expr<U> (for some U),
+/// it will invoke the user-provided rewriter via the () operator.
+///
+/// If x is of type Expr<U>, it will call (in pseudo-code):
+///   rewriter_(x, active_member_of(x.u))
+/// The second parameter is there to make it easier to overload the () operator
+/// for specific operations in Expr<...>.
+///
+/// The user rewriter is only invoked for Expr<U>, not for Operation, nor any
+/// other subobject.
+template <typename Rewriter> struct Mutator {
+  Mutator(Rewriter &rewriter) : rewriter_(rewriter) {}
+
+  template <typename T, typename U = llvm::remove_cvref_t<T>>
+  U operator()(T &&x) {
+    if constexpr (std::is_lvalue_reference_v<T>) {
+      return Mutate(U(x));
+    } else {
+      return Mutate(std::move(x));
+    }
+  }
+
+private:
+  template <typename T> struct LambdaWithRvalueCapture {
+    LambdaWithRvalueCapture(Rewriter &r, Expr<T> &&c)
+        : rewriter_(r), capture_(std::move(c)) {}
+    template <typename S> Expr<T> operator()(const S &s) {
+      return rewriter_(std::move(capture_), s);
+    }
+
+  private:
+    Rewriter &rewriter_;
+    Expr<T> &&capture_;
+  };
+
+  template <typename T, typename = std::enable_if_t<!is_operation_v<T>>>
+  T Mutate(T &&x) const {
+    return std::move(x);
+  }
+
+  template <typename D, typename = std::enable_if_t<is_operation_v<D>>>
+  D Mutate(D &&op, std::make_index_sequence<D::operands> t = {}) const {
+    return MutateOp(std::move(op), t);
+  }
+
+  template <typename T> //
+  Expr<T> Mutate(Expr<T> &&x) const {
+    // First construct the new expression with the rewritten op.
+    Expr<T> n{common::visit(
+        [&](auto &&s) { //
+          return Expr<T>(Mutate(std::move(s)));
+        },
+        std::move(x.u))};
+    // Return the rewritten expression. The second visit is to make sure
+    // that the second argument in the call to the rewriter is a part of
+    // the Expr<T> passed to it.
+    return common::visit(
+        LambdaWithRvalueCapture<T>(rewriter_, std::move(n)), std::move(n.u));
+  }
+
+  template <typename... Ts>
+  std::variant<Ts...> Mutate(std::variant<Ts...> &&u) const {
+    return common::visit(
+        [this](auto &&s) { return Mutate(std::move(s)); }, std::move(u));
+  }
+
+  template <typename... Ts>
+  std::tuple<Ts...> Mutate(std::tuple<Ts...> &&t) const {
+    return MutateTuple(std::move(t), std::index_sequence_for<Ts...>{});
+  }
+
+  template <typename... Ts, size_t... Is>
+  std::tuple<Ts...> MutateTuple(
+      std::tuple<Ts...> &&t, std::index_sequence<Is...>) const {
+    return std::make_tuple(Mutate(std::move(std::get<Is>(t))...));
+  }
+
+  template <typename D, size_t... Is>
+  D MutateOp(D &&op, std::index_sequence<Is...>) const {
+    return D(Mutate(std::move(op.template operand<Is>()))...);
+  }
+
+  template <typename T, size_t... Is>
+  Extremum<T> MutateOp(Extremum<T> &&op, std::index_sequence<Is...>) const {
+    return Extremum<T>(
+        op.ordering, Mutate(std::move(op.template operand<Is>()))...);
+  }
+
+  template <int K, size_t... Is>
+  ComplexComponent<K> MutateOp(
+      ComplexComponent<K> &&op, std::index_sequence<Is...>) const {
+    return ComplexComponent<K>(
+        op.isImaginaryPart, Mutate(std::move(op.template operand<Is>()))...);
+  }
+
+  template <int K, size_t... Is>
+  LogicalOperation<K> MutateOp(
+      LogicalOperation<K> &&op, std::index_sequence<Is...>) const {
+    return LogicalOperation<K>(
+        op.logicalOperator, Mutate(std::move(op.template operand<Is>()))...);
+  }
+
+  Rewriter &rewriter_;
+};
+
+template <typename Rewriter> Mutator(Rewriter &) -> Mutator<Rewriter>;
+} // namespace rewrite
+} // namespace Fortran::evaluate
+
+#endif // FORTRAN_EVALUATE_REWRITE_H_
diff --git a/libcxx/utils/ci/docker-compose.yml b/libcxx/utils/ci/docker-compose.yml
@@ -1,6 +1,10 @@
 x-versions: &compiler_versions
   GCC_HEAD_VERSION: 16
-  LLVM_HEAD_VERSION: 21
+  LLVM_HEAD_VERSION: 22
+
+x-image-versions: &image_versions
+  BASE_IMAGE: ubuntu:jammy
+  ACTIONS_BASE_IMAGE: builder-base
 
 services:
   builder-base:
@@ -10,8 +14,7 @@ services:
       dockerfile: Dockerfile
       target: builder-base
       args:
-        BASE_IMAGE: ubuntu:jammy
-        <<: *compiler_versions
+        <<: [*image_versions, *compiler_versions]
 
   actions-builder:
     image: ghcr.io/llvm/libcxx-linux-builder:${TAG}
@@ -20,10 +23,8 @@ services:
       dockerfile: Dockerfile
       target: actions-builder
       args:
-        BASE_IMAGE: ubuntu:jammy
-        ACTIONS_BASE_IMAGE: builder-base
         GITHUB_RUNNER_VERSION: "2.326.0"
-        <<: *compiler_versions
+        <<: [*image_versions, *compiler_versions]
 
   android-buildkite-builder:
     image: ghcr.io/llvm/libcxx-android-builder:${TAG}
diff --git a/lldb/scripts/framework-header-fix.sh b/lldb/scripts/framework-header-fix.sh
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -92,18 +92,6 @@ static bool isConflictIP(IRBuilder<>::InsertPoint IP1,
   return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
 }
 
-/// This is wrapper over IRBuilderBase::restoreIP that also restores the current
-/// debug location to the last instruction in the specified basic block if the
-/// insert point points to the end of the block.
-static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder,
-                                 llvm::IRBuilderBase::InsertPoint IP) {
-  Builder.restoreIP(IP);
-  llvm::BasicBlock *BB = Builder.GetInsertBlock();
-  llvm::BasicBlock::iterator I = Builder.GetInsertPoint();
-  if (!BB->empty() && I == BB->end())
-    Builder.SetCurrentDebugLocation(BB->back().getStableDebugLoc());
-}
-
 static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType) {
   // Valid ordered/unordered and base algorithm combinations.
   switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
@@ -163,6 +151,18 @@ static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType) {
 }
 #endif
 
+/// This is wrapper over IRBuilderBase::restoreIP that also restores the current
+/// debug location to the last instruction in the specified basic block if the
+/// insert point points to the end of the block.
+static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder,
+                                 llvm::IRBuilderBase::InsertPoint IP) {
+  Builder.restoreIP(IP);
+  llvm::BasicBlock *BB = Builder.GetInsertBlock();
+  llvm::BasicBlock::iterator I = Builder.GetInsertPoint();
+  if (!BB->empty() && I == BB->end())
+    Builder.SetCurrentDebugLocation(BB->back().getStableDebugLoc());
+}
+
 static const omp::GV &getGridValue(const Triple &T, Function *Kernel) {
   if (T.isAMDGPU()) {
     StringRef Features =
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll
@@ -3398,3 +3398,146 @@ define <vscale x 4 x double> @vfrec7(<vscale x 4 x float> %a) {
   %2 = call <vscale x 4 x double> @llvm.riscv.vfwmacc(<vscale x 4 x double> poison, <vscale x 4 x float> %a, <vscale x 4 x float> %1, iXLen 7, iXLen 6, iXLen 0)
   ret <vscale x 4 x double> %2
 }
+
+define <vscale x 4 x i32> @vandn_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; CHECK-LABEL: vandn_vv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vandn.vv v10, v8, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vandn.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vandn_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
+; CHECK-LABEL: vandn_vx:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vandn.vx v10, v8, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vandn.nxv4i32.i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, i32 %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vbrev_v(<vscale x 4 x i32> %a, iXLen %vl) {
+; CHECK-LABEL: vbrev_v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vbrev.v v10, v8
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vbrev.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vclz_v(<vscale x 4 x i32> %a, iXLen %vl) {
+; CHECK-LABEL: vclz_v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vclz.v v10, v8
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vclz.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vcpop_v(<vscale x 4 x i32> %a, iXLen %vl) {
+; CHECK-LABEL: vcpop_v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vcpop.v v10, v8
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vcpopv.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vctz_v(<vscale x 4 x i32> %a, iXLen %vl) {
+; CHECK-LABEL: vctz_v:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vctz.v v10, v8
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vctz.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vror_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; CHECK-LABEL: vror_vv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vror.vv v10, v8, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vror.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vror_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) {
+; CHECK-LABEL: vror_vx:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vror.vx v10, v8, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vror.nxv4i32.iXLen(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vror_vi(<vscale x 4 x i32> %a, iXLen %vl) {
+; CHECK-LABEL: vror_vi:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vror.vi v10, v8, 5
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vror.nxv4i32.iXLen(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 5, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vrol_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
+; CHECK-LABEL: vrol_vv:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vrol.vv v10, v8, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vrol.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}
+
+define <vscale x 4 x i32> @vrol_vx(<vscale x 4 x i32> %a, iXLen %b, iXLen %vl) {
+; CHECK-LABEL: vrol_vx:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vrol.vx v10, v8, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v10, v8
+; CHECK-NEXT:    ret
+  %1 = call <vscale x 4 x i32> @llvm.riscv.vrol.nxv4i32.iXLen(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen %b, iXLen -1)
+  %2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
+  ret <vscale x 4 x i32> %2
+}