ROCm
diff --git a/‎clang/lib/Driver/OffloadBundler.cpp‎
Lines changed: 5 additions & 5 deletions b/‎clang/lib/Driver/OffloadBundler.cpp‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎clang/lib/Format/DefinitionBlockSeparator.cpp‎
Lines changed: 4 additions & 0 deletions b/‎clang/lib/Format/DefinitionBlockSeparator.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎clang/test/Driver/clang-offload-bundler-standardize.c‎
Lines changed: 4 additions & 3 deletions b/‎clang/test/Driver/clang-offload-bundler-standardize.c‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎clang/unittests/Format/DefinitionBlockSeparatorTest.cpp‎
Lines changed: 5 additions & 0 deletions b/‎clang/unittests/Format/DefinitionBlockSeparatorTest.cpp‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎flang/lib/Optimizer/Transforms/AffinePromotion.cpp‎
Lines changed: 34 additions & 5 deletions b/‎flang/lib/Optimizer/Transforms/AffinePromotion.cpp‎
Lines changed: 34 additions & 5 deletions
diff --git a/‎flang/test/Fir/affine-promotion.fir‎
Lines changed: 86 additions & 0 deletions b/‎flang/test/Fir/affine-promotion.fir‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎libc/config/darwin/arm/entrypoints.txt‎
Lines changed: 1 addition & 0 deletions b/‎libc/config/darwin/arm/entrypoints.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎libc/config/linux/aarch64/entrypoints.txt‎
Lines changed: 1 addition & 0 deletions b/‎libc/config/linux/aarch64/entrypoints.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎libc/config/linux/arm/entrypoints.txt‎
Lines changed: 1 addition & 0 deletions b/‎libc/config/linux/arm/entrypoints.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎libc/config/linux/riscv/entrypoints.txt‎
Lines changed: 1 addition & 0 deletions b/‎libc/config/linux/riscv/entrypoints.txt‎
Lines changed: 1 addition & 0 deletions
@@ -145,11 +145,11 @@ bool OffloadTargetInfo::operator==(const OffloadTargetInfo &Target) const {
 
 std::string OffloadTargetInfo::str() const {
   std::string NormalizedTriple;
-  // Unfortunately we need some special sauce for AMDGPU because all the runtime
-  // assumes the triple to be "amdgcn-amd-amdhsa-" (empty environment) instead
-  // of "amdgcn-amd-amdhsa-unknown". It's gonna be very tricky to patch
-  // different layers of runtime.
-  if (Triple.isAMDGPU()) {
+  // Unfortunately we need some special sauce for AMDHSA because all the runtime
+  // assumes the triple to be "amdgcn/spirv64-amd-amdhsa-" (empty environment)
+  // instead of "amdgcn/spirv64-amd-amdhsa-unknown". It's gonna be very tricky
+  // to patch different layers of runtime.
+  if (Triple.getOS() == Triple::OSType::AMDHSA) {
     NormalizedTriple = Triple.normalize(Triple::CanonicalForm::THREE_IDENT);
     NormalizedTriple.push_back('-');
   } else {
 
@@ -137,6 +137,10 @@ void DefinitionBlockSeparator::separateBlocks(
     const auto MayPrecedeDefinition = [&](const int Direction = -1) {
       assert(Direction >= -1);
       assert(Direction <= 1);
+
+      if (Lines[OpeningLineIndex]->First->is(TT_CSharpGenericTypeConstraint))
+        return true;
+
       const size_t OperateIndex = OpeningLineIndex + Direction;
       assert(OperateIndex < Lines.size());
       const auto &OperateLine = Lines[OperateIndex];
 
@@ -11,16 +11,17 @@
 //
 // RUN: echo 'Content of device file 1' > %t.tgt1
 // RUN: echo 'Content of device file 2' > %t.tgt2
-
+// RUN: echo 'Content of device file 3' > %t.tgt3
 //
 // Check code object compatibility for archive unbundling
 //
 // Create an object bundle
-// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx908 -input=%t.o -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle
+// RUN: clang-offload-bundler -type=o -targets=host-%itanium_abi_triple,hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx908,hip-spirv64-amd-amdhsa--amdgcnspirv -input=%t.o -input=%t.tgt1 -input=%t.tgt2 -input=%t.tgt3 -output=%t.bundle
 
-// RUN: clang-offload-bundler -unbundle -type=o -targets=hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx908 -input=%t.bundle -output=%t-hip-amdgcn-amd-amdhsa--gfx906.bc -output=%t-hip-amdgcn-amd-amdhsa--gfx908.bc -debug-only=CodeObjectCompatibility 2>&1 | FileCheck %s -check-prefix=BUNDLE
+// RUN: clang-offload-bundler -unbundle -type=o -targets=hip-amdgcn-amd-amdhsa--gfx906,hip-amdgcn-amd-amdhsa--gfx908,hip-spirv64-amd-amdhsa--amdgcnspirv -input=%t.bundle -output=%t-hip-amdgcn-amd-amdhsa--gfx906.bc -output=%t-hip-amdgcn-amd-amdhsa--gfx908.bc -output=%t-hip-spirv64-amd-amdhsa--amdgcnspirv.bc -debug-only=CodeObjectCompatibility 2>&1 | FileCheck %s -check-prefix=BUNDLE
 // BUNDLE: Compatible: Exact match: [CodeObject: hip-amdgcn-amd-amdhsa--gfx906] : [Target: hip-amdgcn-amd-amdhsa--gfx906]
 // BUNDLE: Compatible: Exact match: [CodeObject: hip-amdgcn-amd-amdhsa--gfx908] : [Target: hip-amdgcn-amd-amdhsa--gfx908]
+// BUNDLE: Compatible: Exact match: [CodeObject: hip-spirv64-amd-amdhsa--amdgcnspirv] : [Target: hip-spirv64-amd-amdhsa--amdgcnspirv]
 
 // Some code so that we can create a binary out of this file.
 int A = 0;
 
@@ -574,6 +574,11 @@ TEST_F(DefinitionBlockSeparatorTest, CSharp) {
                "\r\n"
                "public class FoobarClass {\r\n"
                "  int foobar;\r\n"
+               "}\r\n"
+               "\r\n"
+               "public class LogFactory<TLogger>\r\n"
+               "    where TLogger : class, new() {\r\n"
+               "  int i;\r\n"
                "}",
                Style);
 }
 
@@ -49,8 +49,9 @@ struct AffineIfAnalysis;
 /// second when doing rewrite.
 struct AffineFunctionAnalysis {
   explicit AffineFunctionAnalysis(mlir::func::FuncOp funcOp) {
-    for (fir::DoLoopOp op : funcOp.getOps<fir::DoLoopOp>())
-      loopAnalysisMap.try_emplace(op, op, *this);
+    funcOp->walk([&](fir::DoLoopOp doloop) {
+      loopAnalysisMap.try_emplace(doloop, doloop, *this);
+    });
   }
 
   AffineLoopAnalysis getChildLoopAnalysis(fir::DoLoopOp op) const;
@@ -102,10 +103,23 @@ struct AffineLoopAnalysis {
     return true;
   }
 
+  bool analysisResults(fir::DoLoopOp loopOperation) {
+    if (loopOperation.getFinalValue() &&
+        !loopOperation.getResult(0).use_empty()) {
+      LLVM_DEBUG(
+          llvm::dbgs()
+              << "AffineLoopAnalysis: cannot promote loop final value\n";);
+      return false;
+    }
+
+    return true;
+  }
+
   bool analyzeLoop(fir::DoLoopOp loopOperation,
                    AffineFunctionAnalysis &functionAnalysis) {
     LLVM_DEBUG(llvm::dbgs() << "AffineLoopAnalysis: \n"; loopOperation.dump(););
     return analyzeMemoryAccess(loopOperation) &&
+           analysisResults(loopOperation) &&
            analyzeBody(loopOperation, functionAnalysis);
   }
 
@@ -461,14 +475,28 @@ class AffineLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
     LLVM_ATTRIBUTE_UNUSED auto loopAnalysis =
         functionAnalysis.getChildLoopAnalysis(loop);
     auto &loopOps = loop.getBody()->getOperations();
+    auto resultOp = cast<fir::ResultOp>(loop.getBody()->getTerminator());
+    auto results = resultOp.getOperands();
+    auto loopResults = loop->getResults();
     auto loopAndIndex = createAffineFor(loop, rewriter);
     auto affineFor = loopAndIndex.first;
     auto inductionVar = loopAndIndex.second;
 
+    if (loop.getFinalValue()) {
+      results = results.drop_front();
+      loopResults = loopResults.drop_front();
+    }
+
     rewriter.startOpModification(affineFor.getOperation());
     affineFor.getBody()->getOperations().splice(
         std::prev(affineFor.getBody()->end()), loopOps, loopOps.begin(),
         std::prev(loopOps.end()));
+    rewriter.replaceAllUsesWith(loop.getRegionIterArgs(),
+                                affineFor.getRegionIterArgs());
+    if (!results.empty()) {
+      rewriter.setInsertionPointToEnd(affineFor.getBody());
+      rewriter.create<affine::AffineYieldOp>(resultOp->getLoc(), results);
+    }
     rewriter.finalizeOpModification(affineFor.getOperation());
 
     rewriter.startOpModification(loop.getOperation());
@@ -479,7 +507,8 @@ class AffineLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
 
     LLVM_DEBUG(llvm::dbgs() << "AffineLoopConversion: loop rewriten to:\n";
                affineFor.dump(););
-    rewriter.replaceOp(loop, affineFor.getOperation()->getResults());
+    rewriter.replaceAllUsesWith(loopResults, affineFor->getResults());
+    rewriter.eraseOp(loop);
     return success();
   }
 
@@ -503,7 +532,7 @@ class AffineLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
         ValueRange(op.getUpperBound()),
         mlir::AffineMap::get(0, 1,
                              1 + mlir::getAffineSymbolExpr(0, op.getContext())),
-        step);
+        step, op.getIterOperands());
     return std::make_pair(affineFor, affineFor.getInductionVar());
   }
 
@@ -528,7 +557,7 @@ class AffineLoopConversion : public mlir::OpRewritePattern<fir::DoLoopOp> {
         genericUpperBound.getResult(),
         mlir::AffineMap::get(0, 1,
                              1 + mlir::getAffineSymbolExpr(0, op.getContext())),
-        1);
+        1, op.getIterOperands());
     rewriter.setInsertionPointToStart(affineFor.getBody());
     auto actualIndex = rewriter.create<affine::AffineApplyOp>(
         op.getLoc(), actualIndexMap,
 
@@ -131,3 +131,89 @@ func.func @loop_with_if(%a: !arr_d1, %v: f32) {
 // CHECK:   }
 // CHECK:   return
 // CHECK: }
+
+func.func @loop_with_result(%arg0: !fir.ref<!fir.array<100xf32>>, %arg1: !fir.ref<!fir.array<100x100xf32>>, %arg2: !fir.ref<!fir.array<100xf32>>) -> f32 {
+  %c1 = arith.constant 1 : index
+  %cst = arith.constant 0.000000e+00 : f32
+  %c100 = arith.constant 100 : index
+  %0 = fir.shape %c100 : (index) -> !fir.shape<1>
+  %1 = fir.shape %c100, %c100 : (index, index) -> !fir.shape<2>
+  %2 = fir.alloca i32
+  %3:2 = fir.do_loop %arg3 = %c1 to %c100 step %c1 iter_args(%arg4 = %cst) -> (index, f32) {
+    %8 = fir.array_coor %arg0(%0) %arg3 : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+    %9 = fir.load %8 : !fir.ref<f32>
+    %10 = arith.addf %arg4, %9 fastmath<contract> : f32
+    %11 = arith.addi %arg3, %c1 overflow<nsw> : index
+    fir.result %11, %10 : index, f32
+  }
+  %4:2 = fir.do_loop %arg3 = %c1 to %c100 step %c1 iter_args(%arg4 = %3#1) -> (index, f32) {
+    %8 = fir.array_coor %arg1(%1) %c1, %arg3 : (!fir.ref<!fir.array<100x100xf32>>, !fir.shape<2>, index, index) -> !fir.ref<f32>
+    %9 = fir.convert %8 : (!fir.ref<f32>) -> !fir.ref<!fir.array<100xf32>>
+    %10 = fir.do_loop %arg5 = %c1 to %c100 step %c1 iter_args(%arg6 = %arg4) -> (f32) {
+      %12 = fir.array_coor %9(%0) %arg5 : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+      %13 = fir.load %12 : !fir.ref<f32>
+      %14 = arith.addf %arg6, %13 fastmath<contract> : f32
+      fir.result %14 : f32
+    }
+    %11 = arith.addi %arg3, %c1 overflow<nsw> : index
+    fir.result %11, %10 : index, f32
+  }
+  %5:2 = fir.do_loop %arg3 = %c1 to %c100 step %c1 iter_args(%arg4 = %4#1, %arg5 = %cst) -> (f32, f32) {
+    %8 = fir.array_coor %arg0(%0) %arg3 : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+    %9 = fir.load %8 : !fir.ref<f32>
+    %10 = arith.addf %arg4, %9 fastmath<contract> : f32
+    %11 = fir.array_coor %arg2(%0) %arg3 : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, index) -> !fir.ref<f32>
+    %12 = fir.load %11 : !fir.ref<f32>
+    %13 = arith.addf %arg5, %12 fastmath<contract> : f32
+    fir.result %10, %13 : f32, f32
+  }
+  %6 = arith.addf %5#0, %5#1 fastmath<contract> : f32
+  %7 = fir.convert %4#0 : (index) -> i32
+  fir.store %7 to %2 : !fir.ref<i32>
+  return %6 : f32
+}
+
+// CHECK-LABEL:   func.func @loop_with_result(
+// CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>>,
+// CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.array<100x100xf32>>,
+// CHECK-SAME:      %[[ARG2:.*]]: !fir.ref<!fir.array<100xf32>>) -> f32 {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 100 : index
+// CHECK:           %[[VAL_3:.*]] = fir.shape %[[VAL_2]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_4:.*]] = fir.shape %[[VAL_2]], %[[VAL_2]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = fir.alloca i32
+// CHECK:           %[[VAL_6:.*]] = fir.convert %[[ARG0]] : (!fir.ref<!fir.array<100xf32>>) -> memref<?xf32>
+// CHECK:           %[[VAL_7:.*]] = affine.for %[[VAL_8:.*]] = %[[VAL_0]] to #{{.*}}(){{\[}}%[[VAL_2]]] iter_args(%[[VAL_9:.*]] = %[[VAL_1]]) -> (f32) {
+// CHECK:             %[[VAL_10:.*]] = affine.apply #{{.*}}(%[[VAL_8]]){{\[}}%[[VAL_0]], %[[VAL_2]], %[[VAL_0]]]
+// CHECK:             %[[VAL_11:.*]] = affine.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
+// CHECK:             %[[VAL_12:.*]] = arith.addf %[[VAL_9]], %[[VAL_11]] fastmath<contract> : f32
+// CHECK:             affine.yield %[[VAL_12]] : f32
+// CHECK:           }
+// CHECK:           %[[VAL_13:.*]]:2 = fir.do_loop %[[VAL_14:.*]] = %[[VAL_0]] to %[[VAL_2]] step %[[VAL_0]] iter_args(%[[VAL_15:.*]] = %[[VAL_7]]) -> (index, f32) {
+// CHECK:             %[[VAL_16:.*]] = fir.array_coor %[[ARG1]](%[[VAL_4]]) %[[VAL_0]], %[[VAL_14]] : (!fir.ref<!fir.array<100x100xf32>>, !fir.shape<2>, index, index) -> !fir.ref<f32>
+// CHECK:             %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (!fir.ref<f32>) -> !fir.ref<!fir.array<100xf32>>
+// CHECK:             %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (!fir.ref<!fir.array<100xf32>>) -> memref<?xf32>
+// CHECK:             %[[VAL_19:.*]] = affine.for %[[VAL_20:.*]] = %[[VAL_0]] to #{{.*}}(){{\[}}%[[VAL_2]]] iter_args(%[[VAL_21:.*]] = %[[VAL_15]]) -> (f32) {
+// CHECK:               %[[VAL_22:.*]] = affine.apply #{{.*}}(%[[VAL_20]]){{\[}}%[[VAL_0]], %[[VAL_2]], %[[VAL_0]]]
+// CHECK:               %[[VAL_23:.*]] = affine.load %[[VAL_18]]{{\[}}%[[VAL_22]]] : memref<?xf32>
+// CHECK:               %[[VAL_24:.*]] = arith.addf %[[VAL_21]], %[[VAL_23]] fastmath<contract> : f32
+// CHECK:               affine.yield %[[VAL_24]] : f32
+// CHECK:             }
+// CHECK:             %[[VAL_25:.*]] = arith.addi %[[VAL_14]], %[[VAL_0]] overflow<nsw> : index
+// CHECK:             fir.result %[[VAL_25]], %[[VAL_19]] : index, f32
+// CHECK:           }
+// CHECK:           %[[VAL_26:.*]] = fir.convert %[[ARG2]] : (!fir.ref<!fir.array<100xf32>>) -> memref<?xf32>
+// CHECK:           %[[VAL_27:.*]]:2 = affine.for %[[VAL_28:.*]] = %[[VAL_0]] to #{{.*}}(){{\[}}%[[VAL_2]]] iter_args(%[[VAL_29:.*]] = %[[VAL_30:.*]]#1, %[[VAL_31:.*]] = %[[VAL_1]]) -> (f32, f32) {
+// CHECK:             %[[VAL_32:.*]] = affine.apply #{{.*}}(%[[VAL_28]]){{\[}}%[[VAL_0]], %[[VAL_2]], %[[VAL_0]]]
+// CHECK:             %[[VAL_33:.*]] = affine.load %[[VAL_6]]{{\[}}%[[VAL_32]]] : memref<?xf32>
+// CHECK:             %[[VAL_34:.*]] = arith.addf %[[VAL_29]], %[[VAL_33]] fastmath<contract> : f32
+// CHECK:             %[[VAL_35:.*]] = affine.load %[[VAL_26]]{{\[}}%[[VAL_32]]] : memref<?xf32>
+// CHECK:             %[[VAL_36:.*]] = arith.addf %[[VAL_31]], %[[VAL_35]] fastmath<contract> : f32
+// CHECK:             affine.yield %[[VAL_34]], %[[VAL_36]] : f32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_37:.*]] = arith.addf %[[VAL_38:.*]]#0, %[[VAL_38]]#1 fastmath<contract> : f32
+// CHECK:           %[[VAL_39:.*]] = fir.convert %[[VAL_40:.*]]#0 : (index) -> i32
+// CHECK:           fir.store %[[VAL_39]] to %[[VAL_5]] : !fir.ref<i32>
+// CHECK:           return %[[VAL_37]] : f32
+// CHECK:         }
@@ -135,6 +135,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.fenv.feupdateenv
 
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
     libc.src.math.acoshf
     libc.src.math.asin
 
@@ -410,6 +410,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.fenv.feupdateenv
 
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
     libc.src.math.acoshf
     libc.src.math.asin
 
@@ -242,6 +242,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.fenv.feupdateenv
 
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
     libc.src.math.acoshf
     libc.src.math.asin
 
@@ -416,6 +416,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.fenv.feupdateenv
 
     # math.h entrypoints
+    libc.src.math.acos
     libc.src.math.acosf
     libc.src.math.acoshf
     libc.src.math.asin