diff --git a/clang/lib/CodeGen/Targets/Sparc.cpp b/clang/lib/CodeGen/Targets/Sparc.cpp
index 5f3c15d106eb6..38dbebdec2429 100644
--- a/clang/lib/CodeGen/Targets/Sparc.cpp
+++ b/clang/lib/CodeGen/Targets/Sparc.cpp
@@ -8,6 +8,7 @@
 
 #include "ABIInfoImpl.h"
 #include "TargetInfo.h"
+#include <algorithm>
 
 using namespace clang;
 using namespace clang::CodeGen;
@@ -109,7 +110,8 @@ class SparcV9ABIInfo : public ABIInfo {
   SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
 
 private:
-  ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const;
+  ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit,
+                          unsigned &RegOffset) const;
   void computeInfo(CGFunctionInfo &FI) const override;
   RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
                    AggValueSlot Slot) const override;
@@ -222,128 +224,114 @@ class SparcV9ABIInfo : public ABIInfo {
 };
 } // end anonymous namespace
 
-ABIArgInfo
-SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
+ABIArgInfo SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit,
+                                        unsigned &RegOffset) const {
   if (Ty->isVoidType())
     return ABIArgInfo::getIgnore();
 
-  uint64_t Size = getContext().getTypeSize(Ty);
+  auto &Context = getContext();
+  auto &VMContext = getVMContext();
+
+  uint64_t Size = Context.getTypeSize(Ty);
+  unsigned Alignment = Context.getTypeAlign(Ty);
+  bool NeedPadding = (Alignment > 64) && (RegOffset % 2 != 0);
 
   // Anything too big to fit in registers is passed with an explicit indirect
   // pointer / sret pointer.
-  if (Size > SizeLimit)
+  if (Size > SizeLimit) {
+    RegOffset += 1;
     return getNaturalAlignIndirect(
         Ty, /*AddrSpace=*/getDataLayout().getAllocaAddrSpace(),
         /*ByVal=*/false);
+  }
 
   // Treat an enum type as its underlying type.
   if (const auto *ED = Ty->getAsEnumDecl())
     Ty = ED->getIntegerType();
 
   // Integer types smaller than a register are extended.
-  if (Size < 64 && Ty->isIntegerType())
+  if (Size < 64 && Ty->isIntegerType()) {
+    RegOffset += 1;
     return ABIArgInfo::getExtend(Ty);
+  }
 
   if (const auto *EIT = Ty->getAs<BitIntType>())
-    if (EIT->getNumBits() < 64)
+    if (EIT->getNumBits() < 64) {
+      RegOffset += 1;
       return ABIArgInfo::getExtend(Ty);
+    }
 
   // Other non-aggregates go in registers.
-  if (!isAggregateTypeForABI(Ty))
+  if (!isAggregateTypeForABI(Ty)) {
+    RegOffset += Size / 64;
     return ABIArgInfo::getDirect();
+  }
 
   // If a C++ object has either a non-trivial copy constructor or a non-trivial
   // destructor, it is passed with an explicit indirect pointer / sret pointer.
-  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
+  if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
+    RegOffset += 1;
     return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
                                    RAA == CGCXXABI::RAA_DirectInMemory);
+  }
 
   // This is a small aggregate type that should be passed in registers.
   // Build a coercion type from the LLVM struct type.
   llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
-  if (!StrTy)
+  if (!StrTy) {
+    RegOffset += Size / 64;
     return ABIArgInfo::getDirect();
+  }
 
-  CoerceBuilder CB(getVMContext(), getDataLayout());
+  CoerceBuilder CB(VMContext, getDataLayout());
   CB.addStruct(0, StrTy);
   // All structs, even empty ones, should take up a register argument slot,
   // so pin the minimum struct size to one bit.
   CB.pad(llvm::alignTo(
       std::max(CB.DL.getTypeSizeInBits(StrTy).getKnownMinValue(), uint64_t(1)),
       64));
+  RegOffset += CB.Size / 64;
+
+  // If we're dealing with overaligned structs we may need to add a padding in
+  // the front, to preserve the correct register-memory mapping.
+  //
+  // See SCD 2.4.1, pages 3P-11 and 3P-12.
+  llvm::Type *Padding =
+      NeedPadding ? llvm::Type::getInt64Ty(VMContext) : nullptr;
+  RegOffset += NeedPadding ? 1 : 0;
 
   // Try to use the original type for coercion.
   llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType();
 
-  if (CB.InReg)
-    return ABIArgInfo::getDirectInReg(CoerceTy);
-  else
-    return ABIArgInfo::getDirect(CoerceTy);
+  ABIArgInfo AAI = ABIArgInfo::getDirect(CoerceTy, 0, Padding);
+  AAI.setInReg(CB.InReg);
+  return AAI;
 }
 
 RValue SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                  QualType Ty, AggValueSlot Slot) const {
-  ABIArgInfo AI = classifyType(Ty, 16 * 8);
-  llvm::Type *ArgTy = CGT.ConvertType(Ty);
-  if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
-    AI.setCoerceToType(ArgTy);
-
   CharUnits SlotSize = CharUnits::fromQuantity(8);
+  auto TInfo = getContext().getTypeInfoInChars(Ty);
 
-  CGBuilderTy &Builder = CGF.Builder;
-  Address Addr = Address(Builder.CreateLoad(VAListAddr, "ap.cur"),
-                         getVAListElementType(CGF), SlotSize);
-  llvm::Type *ArgPtrTy = CGF.UnqualPtrTy;
-
-  auto TypeInfo = getContext().getTypeInfoInChars(Ty);
-
-  Address ArgAddr = Address::invalid();
-  CharUnits Stride;
-  switch (AI.getKind()) {
-  case ABIArgInfo::Expand:
-  case ABIArgInfo::CoerceAndExpand:
-  case ABIArgInfo::InAlloca:
-  case ABIArgInfo::TargetSpecific:
-    llvm_unreachable("Unsupported ABI kind for va_arg");
-
-  case ABIArgInfo::Extend: {
-    Stride = SlotSize;
-    CharUnits Offset = SlotSize - TypeInfo.Width;
-    ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend");
-    break;
-  }
-
-  case ABIArgInfo::Direct: {
-    auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
-    Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize);
-    ArgAddr = Addr;
-    break;
-  }
-
-  case ABIArgInfo::Indirect:
-  case ABIArgInfo::IndirectAliased:
-    Stride = SlotSize;
-    ArgAddr = Addr.withElementType(ArgPtrTy);
-    ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), ArgTy,
-                      TypeInfo.Align);
-    break;
+  // Zero-sized types have a width of one byte for parameter passing purposes.
+  TInfo.Width = std::max(TInfo.Width, CharUnits::fromQuantity(1));
 
-  case ABIArgInfo::Ignore:
-    return Slot.asRValue();
-  }
-
-  // Update VAList.
-  Address NextPtr = Builder.CreateConstInBoundsByteGEP(Addr, Stride, "ap.next");
-  Builder.CreateStore(NextPtr.emitRawPointer(CGF), VAListAddr);
-
-  return CGF.EmitLoadOfAnyValue(
-      CGF.MakeAddrLValue(ArgAddr.withElementType(ArgTy), Ty), Slot);
+  // Arguments bigger than 2*SlotSize bytes are passed indirectly.
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty,
+                          /*IsIndirect=*/TInfo.Width > 2 * SlotSize, TInfo,
+                          SlotSize,
+                          /*AllowHigherAlign=*/true, Slot);
 }
 
 void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const {
-  FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8);
+  unsigned RetOffset = 0;
+  ABIArgInfo RetType = classifyType(FI.getReturnType(), 32 * 8, RetOffset);
+  FI.getReturnInfo() = RetType;
+
+  // Indirect returns will have its pointer passed as an argument.
+  unsigned ArgOffset = RetType.isIndirect() ? RetOffset : 0;
   for (auto &I : FI.arguments())
-    I.info = classifyType(I.type, 16 * 8);
+    I.info = classifyType(I.type, 16 * 8, ArgOffset);
 }
 
 namespace {
diff --git a/clang/test/CodeGen/sparcv9-abi.c b/clang/test/CodeGen/sparcv9-abi.c
index 5a3d64fd37889..d3f79fd7989fc 100644
--- a/clang/test/CodeGen/sparcv9-abi.c
+++ b/clang/test/CodeGen/sparcv9-abi.c
@@ -25,12 +25,35 @@ long double f_ld(long double x) { return x; }
 struct empty {};
 struct emptyarr { struct empty a[10]; };
 
+// In 16-byte structs, 16-byte aligned members are expanded
+// to their corresponding i128/f128 types.
+struct align16_int { _Alignas(16) int x; };
+struct align16_mixed { _Alignas(16) int x; double y; };
+struct align16_longdouble { long double x; };
+
 // CHECK-LABEL: define{{.*}} i64 @f_empty(i64 %x.coerce)
 struct empty f_empty(struct empty x) { return x; }
 
 // CHECK-LABEL: define{{.*}} i64 @f_emptyarr(i64 %x.coerce)
 struct empty f_emptyarr(struct emptyarr x) { return x.a[0]; }
 
+// CHECK-LABEL: define{{.*}} void @f_aligncaller(i64 %a.coerce0, i64 %a.coerce1)
+// CHECK-LABEL: declare{{.*}} void @f_aligncallee(i32 noundef signext, i64, i64, i64)
+void f_aligncallee(int pad, struct align16_int a);
+void f_aligncaller(struct align16_int a) {
+    f_aligncallee(0, a);
+}
+
+// CHECK-LABEL: define{{.*}} double @f_mixed_aligned(i64 noundef %a, i64 %0, i64 %b.coerce0, double %b.coerce1)
+double f_mixed_aligned(long a, struct align16_mixed b) {
+	return b.y;
+}
+
+// CHECK-LABEL: define{{.*}} fp128 @f_longdouble(i64 noundef %a, i64 %0, fp128 %b.coerce)
+long double f_longdouble(long a, struct align16_longdouble b) {
+	return b.x;
+}
+
 // CHECK-LABEL: define{{.*}} i64 @f_emptyvar(i32 noundef zeroext %count, ...)
 long f_emptyvar(unsigned count, ...) {
     long ret;
@@ -80,6 +103,11 @@ struct medium {
   int *c, *d;
 };
 
+struct medium_aligned {
+  _Alignas(16) int *a;
+  int *b, *c, *d;
+};
+
 // CHECK-LABEL: define{{.*}} %struct.medium @f_medium(ptr dead_on_return noundef %x)
 struct medium f_medium(struct medium x) {
   x.a += *x.b;
@@ -87,6 +115,13 @@ struct medium f_medium(struct medium x) {
   return x;
 }
 
+// CHECK-LABEL: define{{.*}} %struct.medium_aligned @f_medium_aligned(ptr dead_on_return noundef %x)
+struct medium_aligned f_medium_aligned(struct medium_aligned x) {
+  x.a += *x.b;
+  x.b = 0;
+  return x;
+}
+
 // Large structs are also returned indirectly.
 struct large {
   int *a, *b;
@@ -101,6 +136,15 @@ struct large f_large(struct large x) {
   return x;
 }
 
+// Large returns are converted into a pointer argument.
+// Such conversion should preserve the alignment of overaligned arguments.
+// define{{.*}} void @f_largereturn_aligned(ptr dead_on_unwind noalias writable sret(%struct.large) align 8 %agg.result, i64 %0, i64 %x.coerce0, i64 %x.coerce1)
+struct large f_largereturn_aligned(struct align16_int x) {
+  struct large ret;
+  ret.x = x.x;
+  return ret;
+}
+
 // A 64-bit struct fits in a register.
 struct reg {
   int a, b;
@@ -215,6 +259,18 @@ int f_variable(char *f, ...) {
   case 'm':
     s += *va_arg(ap, struct medium).a;
     break;
+
+// CHECK: %[[CUR:[^ ]+]] = load ptr, ptr %ap
+// CHECK-DAG: %[[TMP:[^ ]+]] = getelementptr inbounds i8, ptr %[[CUR]], i32 15
+// CHECK-DAG: %[[ALIGNED:[^ ]+]] = call ptr @llvm.ptrmask.p0.i64(ptr %[[TMP]], i64 -16)
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, ptr %[[ALIGNED]], i64 16
+// CHECK-DAG: store ptr %[[NXT]], ptr %ap
+// CHECK-DAG: call void @llvm.memcpy.p0.p0.i64(ptr align 16 {{.*}}, ptr align 16 %[[ALIGNED]], i64 16, i1 false)
+// CHECK: br
+  case 'a':
+    s += va_arg(ap, struct align16_int).x;
+    break;
   }
+  va_end(ap);
   return s;
 }
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index d01218f573dc2..e5ed9d267afed 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -115,7 +115,8 @@ static bool Analyze_CC_Sparc64_Full(bool IsReturn, unsigned &ValNo, MVT &ValVT,
 
   // Stack space is allocated for all arguments starting from [%fp+BIAS+128].
   unsigned size      = (LocVT == MVT::f128) ? 16 : 8;
-  Align alignment = (LocVT == MVT::f128) ? Align(16) : Align(8);
+  Align alignment =
+      (LocVT == MVT::f128 || ArgFlags.isSplit()) ? Align(16) : Align(8);
   unsigned Offset = State.AllocateStack(size, alignment);
   unsigned Reg = 0;
 
diff --git a/llvm/test/CodeGen/SPARC/64abi.ll b/llvm/test/CodeGen/SPARC/64abi.ll
index 6485a7f13e8d5..dc8c9af4a5185 100644
--- a/llvm/test/CodeGen/SPARC/64abi.ll
+++ b/llvm/test/CodeGen/SPARC/64abi.ll
@@ -473,8 +473,8 @@ declare i64 @receive_fp128(i64 %a, ...)
 ; HARD-DAG:   ldx [%sp+[[Offset0]]], %o2
 ; HARD-DAG:   ldx [%sp+[[Offset1]]], %o3
 ; SOFT-DAG:   mov  %i0, %o0
-; SOFT-DAG:   mov  %i1, %o1
 ; SOFT-DAG:   mov  %i2, %o2
+; SOFT-DAG:   mov  %i3, %o3
 ; CHECK:      call receive_fp128
 define i64 @test_fp128_variable_args(i64 %a, fp128 %b) {
 entry:
@@ -482,6 +482,19 @@ entry:
   ret i64 %0
 }
 
+declare i64 @receive_i128(i64 %a, i128 %b)
+
+; CHECK-LABEL: test_i128_args:
+; CHECK:   mov  %i3, %o3
+; CHECK:   mov  %i2, %o2
+; CHECK:   mov  %i0, %o0
+; CHECK:   call receive_i128
+define i64 @test_i128_args(i64 %a, i128 %b) {
+entry:
+  %0 = call i64 @receive_i128(i64 %a, i128 %b)
+  ret i64 %0
+}
+
 ; CHECK-LABEL: test_call_libfunc:
 ; HARD:   st %f1, [%fp+[[Offset0:[0-9]+]]]
 ; HARD:   fmovs %f3, %f1