diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h index 75092a1fd2bfc..68106f3462c9b 100644 --- a/flang-rt/include/flang-rt/runtime/descriptor.h +++ b/flang-rt/include/flang-rt/runtime/descriptor.h @@ -20,13 +20,16 @@ #include "memory.h" #include "type-code.h" +#include "flang-rt/runtime/allocator-registry.h" #include "flang/Common/ISO_Fortran_binding_wrapper.h" +#include "flang/Common/optional.h" #include "flang/Runtime/descriptor-consts.h" #include #include #include #include #include +#include #include /// Value used for asyncObject when no specific stream is specified. @@ -262,9 +265,20 @@ class Descriptor { template RT_API_ATTRS A *ZeroBasedIndexedElement(std::size_t n) const { - SubscriptValue at[maxRank]; - if (SubscriptsForZeroBasedElementNumber(at, n)) { - return Element(at); + if (raw_.rank == 0) { + if (n == 0) { + return OffsetElement(); + } + } else if (raw_.rank == 1) { + const auto &dim{GetDimension(0)}; + if (n < static_cast(dim.Extent())) { + return OffsetElement(n * dim.ByteStride()); + } + } else { + SubscriptValue at[maxRank]; + if (SubscriptsForZeroBasedElementNumber(at, n)) { + return Element(at); + } } return nullptr; } @@ -366,6 +380,18 @@ class Descriptor { RT_API_ATTRS std::size_t SizeInBytes() const; RT_API_ATTRS std::size_t Elements() const; + RT_API_ATTRS std::size_t InlineElements() const { + int n{rank()}; + if (n == 0) { + return 1; + } else { + auto elements{static_cast(GetDimension(0).Extent())}; + for (int j{1}; j < n; ++j) { + elements *= GetDimension(j).Extent(); + } + return elements; + } + } // Allocate() assumes Elements() and ElementBytes() work; // define the extents of the dimensions and the element length @@ -377,7 +403,22 @@ class Descriptor { // Deallocates storage; does not call FINAL subroutines or // deallocate allocatable/automatic components. - RT_API_ATTRS int Deallocate(); + RT_API_ATTRS int Deallocate() { + ISO::CFI_cdesc_t &descriptor{raw()}; + void *pointer{descriptor.base_addr}; + if (!pointer) { + return CFI_ERROR_BASE_ADDR_NULL; + } else { + int allocIndex{MapAllocIdx()}; + if (allocIndex == kDefaultAllocator) { + std::free(pointer); + } else { + allocatorRegistry.GetDeallocator(MapAllocIdx())(pointer); + } + descriptor.base_addr = nullptr; + return CFI_SUCCESS; + } + } // Deallocates storage, including allocatable and automatic // components. Optionally invokes FINAL subroutines. @@ -392,8 +433,7 @@ class Descriptor { bool stridesAreContiguous{true}; for (int j{0}; j < leadingDimensions; ++j) { const Dimension &dim{GetDimension(j)}; - stridesAreContiguous &= - (bytes == dim.ByteStride()) || (dim.Extent() == 1); + stridesAreContiguous &= bytes == dim.ByteStride() || dim.Extent() == 1; bytes *= dim.Extent(); } // One and zero element arrays are contiguous even if the descriptor @@ -406,6 +446,32 @@ class Descriptor { return stridesAreContiguous || bytes == 0; } + // The result, if any, is a fixed stride value that can be used to + // address all elements. It generalizes contiguity by also allowing + // the case of an array with extent 1 on all but one dimension. + RT_API_ATTRS common::optional FixedStride() const { + auto rank{static_cast(raw_.rank)}; + common::optional stride; + for (std::size_t j{0}; j < rank; ++j) { + const Dimension &dim{GetDimension(j)}; + auto extent{dim.Extent()}; + if (extent == 0) { + break; // empty array + } else if (extent == 1) { // ok + } else if (stride) { + // Extent > 1 on multiple dimensions + if (IsContiguous()) { + return ElementBytes(); + } else { + return common::nullopt; + } + } else { + stride = dim.ByteStride(); + } + } + return stride.value_or(0); // 0 for scalars and empty arrays + } + // Establishes a pointer to a section or element. RT_API_ATTRS bool EstablishPointerSection(const Descriptor &source, const SubscriptValue *lower = nullptr, @@ -427,6 +493,14 @@ class Descriptor { RT_API_ATTRS inline int GetAllocIdx() const { return (raw_.extra & _CFI_ALLOCATOR_IDX_MASK) >> _CFI_ALLOCATOR_IDX_SHIFT; } + RT_API_ATTRS int MapAllocIdx() const { +#ifdef RT_DEVICE_COMPILATION + // Force default allocator in device code. + return kDefaultAllocator; +#else + return GetAllocIdx(); +#endif + } RT_API_ATTRS inline void SetAllocIdx(int pos) { raw_.extra &= ~_CFI_ALLOCATOR_IDX_MASK; // Clear the allocator index bits. raw_.extra |= pos << _CFI_ALLOCATOR_IDX_SHIFT; diff --git a/flang-rt/include/flang-rt/runtime/type-info.h b/flang-rt/include/flang-rt/runtime/type-info.h index 80301a313282f..a8d39f4f8a1a3 100644 --- a/flang-rt/include/flang-rt/runtime/type-info.h +++ b/flang-rt/include/flang-rt/runtime/type-info.h @@ -68,7 +68,9 @@ class Component { RT_API_ATTRS std::uint64_t offset() const { return offset_; } RT_API_ATTRS const Value &characterLen() const { return characterLen_; } RT_API_ATTRS const DerivedType *derivedType() const { - return derivedType_.descriptor().OffsetElement(); + return category() == TypeCategory::Derived + ? derivedType_.descriptor().OffsetElement() + : nullptr; } RT_API_ATTRS const Value *lenValue() const { return lenValue_.descriptor().OffsetElement(); diff --git a/flang-rt/include/flang-rt/runtime/work-queue.h b/flang-rt/include/flang-rt/runtime/work-queue.h index 0daa7bc4d3384..77254787c44aa 100644 --- a/flang-rt/include/flang-rt/runtime/work-queue.h +++ b/flang-rt/include/flang-rt/runtime/work-queue.h @@ -62,6 +62,7 @@ #include "flang-rt/runtime/stat.h" #include "flang-rt/runtime/type-info.h" #include "flang/Common/api-attrs.h" +#include "flang/Common/optional.h" #include "flang/Runtime/freestanding-tools.h" #include @@ -122,7 +123,7 @@ class Elementwise { protected: const Descriptor &instance_, *from_{nullptr}; - std::size_t elements_{instance_.Elements()}; + std::size_t elements_{instance_.InlineElements()}; std::size_t elementAt_{0}; SubscriptValue subscripts_[common::maxRank]; SubscriptValue fromSubscripts_[common::maxRank]; @@ -131,11 +132,19 @@ class Elementwise { // Base class for ticket workers that operate over derived type components. class Componentwise { public: - RT_API_ATTRS Componentwise(const typeInfo::DerivedType &); + RT_API_ATTRS Componentwise(const typeInfo::DerivedType &derived) + : derived_{derived}, components_{derived_.component().InlineElements()} { + GetFirstComponent(); + } + RT_API_ATTRS bool IsComplete() const { return componentAt_ >= components_; } RT_API_ATTRS void Advance() { ++componentAt_; - GetComponent(); + if (IsComplete()) { + component_ = nullptr; + } else { + ++component_; + } } RT_API_ATTRS void SkipToEnd() { component_ = nullptr; @@ -144,15 +153,21 @@ class Componentwise { RT_API_ATTRS void Reset() { component_ = nullptr; componentAt_ = 0; - GetComponent(); + GetFirstComponent(); } - RT_API_ATTRS void GetComponent(); protected: const typeInfo::DerivedType &derived_; std::size_t components_{0}, componentAt_{0}; const typeInfo::Component *component_{nullptr}; StaticDescriptor componentDescriptor_; + +private: + RT_API_ATTRS void GetFirstComponent() { + if (components_ > 0) { + component_ = derived_.component().OffsetElement(); + } + } }; // Base class for ticket workers that operate over derived type components @@ -228,14 +243,14 @@ class ElementsOverComponents : public Elementwise, public Componentwise { // Ticket worker classes -// Implements derived type instance initialization +// Implements derived type instance initialization. class InitializeTicket : public ImmediateTicketRunner, - private ComponentsOverElements { + private ElementsOverComponents { public: RT_API_ATTRS InitializeTicket( const Descriptor &instance, const typeInfo::DerivedType &derived) : ImmediateTicketRunner{*this}, - ComponentsOverElements{instance, derived} {} + ElementsOverComponents{instance, derived} {} RT_API_ATTRS int Begin(WorkQueue &); RT_API_ATTRS int Continue(WorkQueue &); }; @@ -283,12 +298,14 @@ class DestroyTicket : public ImmediateTicketRunner, RT_API_ATTRS DestroyTicket(const Descriptor &instance, const typeInfo::DerivedType &derived, bool finalize) : ImmediateTicketRunner{*this}, - ComponentsOverElements{instance, derived}, finalize_{finalize} {} + ComponentsOverElements{instance, derived}, finalize_{finalize}, + fixedStride_{instance.FixedStride()} {} RT_API_ATTRS int Begin(WorkQueue &); RT_API_ATTRS int Continue(WorkQueue &); private: bool finalize_{false}; + std::optional fixedStride_; }; // Implements general intrinsic assignment @@ -302,11 +319,11 @@ class AssignTicket : public ImmediateTicketRunner { RT_API_ATTRS int Continue(WorkQueue &); private: + RT_API_ATTRS Descriptor &GetTempDescriptor(); RT_API_ATTRS bool IsSimpleMemmove() const { return !toDerived_ && to_.rank() == from_->rank() && to_.IsContiguous() && from_->IsContiguous() && to_.ElementBytes() == from_->ElementBytes(); } - RT_API_ATTRS Descriptor &GetTempDescriptor(); Descriptor &to_; const Descriptor *from_{nullptr}; @@ -549,6 +566,7 @@ class WorkQueue { TicketList *first_{nullptr}, *last_{nullptr}, *insertAfter_{nullptr}; TicketList static_[numStatic_]; TicketList *firstFree_{static_}; + bool anyDynamicAllocation_{false}; }; } // namespace Fortran::runtime diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp index f936a4192a33c..737e9cc930781 100644 --- a/flang-rt/lib/runtime/assign.cpp +++ b/flang-rt/lib/runtime/assign.cpp @@ -217,8 +217,8 @@ static RT_API_ATTRS void DoElementalDefinedAssignment(const Descriptor &to, toElementDesc.Establish(derived, nullptr, 0, nullptr, CFI_attribute_pointer); fromElementDesc.Establish( derived, nullptr, 0, nullptr, CFI_attribute_pointer); - for (std::size_t toElements{to.Elements()}; toElements-- > 0; - to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) { + for (std::size_t toElements{to.InlineElements()}; toElements-- > 0; + to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) { toElementDesc.set_base_addr(to.Element(toAt)); fromElementDesc.set_base_addr(from.Element(fromAt)); DoScalarDefinedAssignment(toElementDesc, fromElementDesc, derived, special); @@ -431,11 +431,14 @@ RT_API_ATTRS int AssignTicket::Continue(WorkQueue &workQueue) { } } // Intrinsic assignment - std::size_t toElements{to_.Elements()}; - if (from_->rank() > 0 && toElements != from_->Elements()) { - workQueue.terminator().Crash("Assign: mismatching element counts in array " - "assignment (to %zd, from %zd)", - toElements, from_->Elements()); + std::size_t toElements{to_.InlineElements()}; + if (from_->rank() > 0) { + std::size_t fromElements{from_->InlineElements()}; + if (toElements != fromElements) { + workQueue.terminator().Crash("Assign: mismatching element counts in " + "array assignment (to %zd, from %zd)", + toElements, fromElements); + } } if (to_.type() != from_->type()) { workQueue.terminator().Crash( @@ -529,7 +532,7 @@ RT_API_ATTRS int DerivedAssignTicket::Begin( // allocatable components or defined ASSIGNMENT(=) at any level. memmoveFct_(this->instance_.template OffsetElement(), this->from_->template OffsetElement(), - this->instance_.Elements() * elementBytes); + this->instance_.InlineElements() * elementBytes); return StatOk; } } @@ -544,7 +547,7 @@ RT_API_ATTRS int DerivedAssignTicket::Begin( // Copy procedure pointer components const Descriptor &procPtrDesc{this->derived_.procPtr()}; bool noDataComponents{this->IsComplete()}; - if (std::size_t numProcPtrs{procPtrDesc.Elements()}) { + if (std::size_t numProcPtrs{procPtrDesc.InlineElements()}) { for (std::size_t k{0}; k < numProcPtrs; ++k) { const auto &procPtr{ *procPtrDesc.ZeroBasedIndexedElement(k)}; @@ -614,7 +617,7 @@ RT_API_ATTRS int DerivedAssignTicket::Continue( memmoveFct_(to, from, componentByteSize); } } - this->Componentwise::Advance(); + this->SkipToNextComponent(); } else { memmoveFct_( this->instance_.template Element(this->subscripts_) + @@ -646,7 +649,7 @@ RT_API_ATTRS int DerivedAssignTicket::Continue( memmoveFct_(to, from, componentByteSize); } } - this->Componentwise::Advance(); + this->SkipToNextComponent(); } else { memmoveFct_(this->instance_.template Element(this->subscripts_) + this->component_->offset(), @@ -668,11 +671,11 @@ RT_API_ATTRS int DerivedAssignTicket::Continue( if (toDesc->IsAllocatable() && !fromDesc->IsAllocated()) { if (toDesc->IsAllocated()) { if (this->phase_ == 0) { - this->phase_++; if (componentDerived && !componentDerived->noDestructionNeeded()) { if (int status{workQueue.BeginDestroy( *toDesc, *componentDerived, /*finalize=*/false)}; status != StatOk) { + this->phase_++; return status; } } @@ -725,15 +728,15 @@ RT_API_ATTRS void DoFromSourceAssign(Descriptor &alloc, SubscriptValue allocAt[maxRank]; alloc.GetLowerBounds(allocAt); if (allocDerived) { - for (std::size_t n{alloc.Elements()}; n-- > 0; - alloc.IncrementSubscripts(allocAt)) { + for (std::size_t n{alloc.InlineElements()}; n-- > 0; + alloc.IncrementSubscripts(allocAt)) { Descriptor allocElement{*Descriptor::Create(*allocDerived, reinterpret_cast(alloc.Element(allocAt)), 0)}; Assign(allocElement, source, terminator, NoAssignFlags, memmoveFct); } } else { // intrinsic type - for (std::size_t n{alloc.Elements()}; n-- > 0; - alloc.IncrementSubscripts(allocAt)) { + for (std::size_t n{alloc.InlineElements()}; n-- > 0; + alloc.IncrementSubscripts(allocAt)) { memmoveFct(alloc.Element(allocAt), source.raw().base_addr, alloc.ElementBytes()); } diff --git a/flang-rt/lib/runtime/derived.cpp b/flang-rt/lib/runtime/derived.cpp index 4e36b1e2edfc8..bb9a68abef2a7 100644 --- a/flang-rt/lib/runtime/derived.cpp +++ b/flang-rt/lib/runtime/derived.cpp @@ -39,64 +39,46 @@ RT_API_ATTRS int Initialize(const Descriptor &instance, } RT_API_ATTRS int InitializeTicket::Begin(WorkQueue &) { - // Initialize procedure pointer components in each element - const Descriptor &procPtrDesc{derived_.procPtr()}; - if (std::size_t numProcPtrs{procPtrDesc.Elements()}) { - for (std::size_t k{0}; k < numProcPtrs; ++k) { - const auto &comp{ - *procPtrDesc.ZeroBasedIndexedElement(k)}; - // Loop only over elements - if (k > 0) { - Elementwise::Reset(); - } - for (; !Elementwise::IsComplete(); Elementwise::Advance()) { - auto &pptr{*instance_.ElementComponent( - subscripts_, comp.offset)}; - pptr = comp.procInitialization; - } - } - if (IsComplete()) { - return StatOk; + if (elements_ == 0) { + return StatOk; + } else { + // Initialize procedure pointer components in the first element, + // whence they will be copied later into all others. + const Descriptor &procPtrDesc{derived_.procPtr()}; + std::size_t numProcPtrs{procPtrDesc.InlineElements()}; + char *raw{instance_.OffsetElement()}; + const auto *ppComponent{ + procPtrDesc.OffsetElement()}; + for (std::size_t k{0}; k < numProcPtrs; ++k, ++ppComponent) { + auto &pptr{*reinterpret_cast( + raw + ppComponent->offset)}; + pptr = ppComponent->procInitialization; } - Elementwise::Reset(); + return StatContinue; } - return StatContinue; } RT_API_ATTRS int InitializeTicket::Continue(WorkQueue &workQueue) { - while (!IsComplete()) { + // Initialize the data components of the first element. + char *rawInstance{instance_.OffsetElement()}; + for (; !Componentwise::IsComplete(); SkipToNextComponent()) { + char *rawComponent{rawInstance + component_->offset()}; if (component_->genre() == typeInfo::Component::Genre::Allocatable) { - // Establish allocatable descriptors - for (; !Elementwise::IsComplete(); Elementwise::Advance()) { - Descriptor &allocDesc{*instance_.ElementComponent( - subscripts_, component_->offset())}; - component_->EstablishDescriptor( - allocDesc, instance_, workQueue.terminator()); - allocDesc.raw().attribute = CFI_attribute_allocatable; - } - SkipToNextComponent(); + Descriptor &allocDesc{*reinterpret_cast(rawComponent)}; + component_->EstablishDescriptor( + allocDesc, instance_, workQueue.terminator()); } else if (const void *init{component_->initialization()}) { // Explicit initialization of data pointers and // non-allocatable non-automatic components std::size_t bytes{component_->SizeInBytes(instance_)}; - for (; !Elementwise::IsComplete(); Elementwise::Advance()) { - char *ptr{instance_.ElementComponent( - subscripts_, component_->offset())}; - std::memcpy(ptr, init, bytes); - } - SkipToNextComponent(); + std::memcpy(rawComponent, init, bytes); } else if (component_->genre() == typeInfo::Component::Genre::Pointer) { // Data pointers without explicit initialization are established // so that they are valid right-hand side targets of pointer // assignment statements. - for (; !Elementwise::IsComplete(); Elementwise::Advance()) { - Descriptor &ptrDesc{*instance_.ElementComponent( - subscripts_, component_->offset())}; - component_->EstablishDescriptor( - ptrDesc, instance_, workQueue.terminator()); - ptrDesc.raw().attribute = CFI_attribute_pointer; - } - SkipToNextComponent(); + Descriptor &ptrDesc{*reinterpret_cast(rawComponent)}; + component_->EstablishDescriptor( + ptrDesc, instance_, workQueue.terminator()); } else if (component_->genre() == typeInfo::Component::Genre::Data && component_->derivedType() && !component_->derivedType()->noInitializationNeeded()) { @@ -106,16 +88,41 @@ RT_API_ATTRS int InitializeTicket::Continue(WorkQueue &workQueue) { GetComponentExtents(extents, *component_, instance_); Descriptor &compDesc{componentDescriptor_.descriptor()}; const typeInfo::DerivedType &compType{*component_->derivedType()}; - compDesc.Establish(compType, - instance_.ElementComponent(subscripts_, component_->offset()), - component_->rank(), extents); - Advance(); + compDesc.Establish(compType, rawComponent, component_->rank(), extents); if (int status{workQueue.BeginInitialize(compDesc, compType)}; status != StatOk) { + SkipToNextComponent(); return status; } - } else { - SkipToNextComponent(); + } + } + // The first element is now complete. Copy it into the others. + if (elements_ < 2) { + } else { + auto elementBytes{static_cast(instance_.ElementBytes())}; + if (auto stride{instance_.FixedStride()}) { + if (*stride == elementBytes) { // contiguous + for (std::size_t done{1}; done < elements_;) { + std::size_t chunk{elements_ - done}; + if (chunk > done) { + chunk = done; + } + char *uninitialized{rawInstance + done * *stride}; + std::memcpy(uninitialized, rawInstance, chunk * *stride); + done += chunk; + } + } else { + for (std::size_t done{1}; done < elements_; ++done) { + char *uninitialized{rawInstance + done * *stride}; + std::memcpy(uninitialized, rawInstance, elementBytes); + } + } + } else { // one at a time with subscription + for (Elementwise::Advance(); !Elementwise::IsComplete(); + Elementwise::Advance()) { + char *element{instance_.Element(subscripts_)}; + std::memcpy(element, rawInstance, elementBytes); + } } } return StatOk; @@ -237,7 +244,7 @@ static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor, const typeInfo::DerivedType &derived, Terminator &terminator) { if (const auto *special{FindFinal(derived, descriptor.rank())}) { if (special->which() == typeInfo::SpecialBinding::Which::ElementalFinal) { - std::size_t elements{descriptor.Elements()}; + std::size_t elements{descriptor.InlineElements()}; SubscriptValue at[maxRank]; descriptor.GetLowerBounds(at); if (special->IsArgDescriptor(0)) { @@ -415,24 +422,33 @@ RT_API_ATTRS int DestroyTicket::Continue(WorkQueue &workQueue) { // Contrary to finalization, the order of deallocation does not matter. while (!IsComplete()) { const auto *componentDerived{component_->derivedType()}; - if (component_->genre() == typeInfo::Component::Genre::Allocatable || - component_->genre() == typeInfo::Component::Genre::Automatic) { - Descriptor *d{instance_.ElementComponent( - subscripts_, component_->offset())}; - if (d->IsAllocated()) { - if (phase_ == 0) { - ++phase_; - if (componentDerived && !componentDerived->noDestructionNeeded()) { + if (component_->genre() == typeInfo::Component::Genre::Allocatable) { + if (fixedStride_ && + (!componentDerived || componentDerived->noDestructionNeeded())) { + // common fast path, just deallocate in every element + char *p{instance_.OffsetElement(component_->offset())}; + for (std::size_t j{0}; j < elements_; ++j, p += *fixedStride_) { + Descriptor &d{*reinterpret_cast(p)}; + d.Deallocate(); + } + SkipToNextComponent(); + } else { + Descriptor &d{*instance_.ElementComponent( + subscripts_, component_->offset())}; + if (d.IsAllocated()) { + if (componentDerived && !componentDerived->noDestructionNeeded() && + phase_ == 0) { if (int status{workQueue.BeginDestroy( - *d, *componentDerived, /*finalize=*/false)}; + d, *componentDerived, /*finalize=*/false)}; status != StatOk) { + ++phase_; return status; } } + d.Deallocate(); } - d->Deallocate(); + Advance(); } - Advance(); } else if (component_->genre() == typeInfo::Component::Genre::Data) { if (!componentDerived || componentDerived->noDestructionNeeded()) { SkipToNextComponent(); diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp index 67336d01380e0..b723acdd27bd5 100644 --- a/flang-rt/lib/runtime/descriptor.cpp +++ b/flang-rt/lib/runtime/descriptor.cpp @@ -85,12 +85,19 @@ RT_API_ATTRS void Descriptor::Establish(int characterKind, RT_API_ATTRS void Descriptor::Establish(const typeInfo::DerivedType &dt, void *p, int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute) { - Establish(TypeCode{TypeCategory::Derived, 0}, dt.sizeInBytes(), p, rank, - extent, attribute, true); - DescriptorAddendum *a{Addendum()}; - Terminator terminator{__FILE__, __LINE__}; - RUNTIME_CHECK(terminator, a != nullptr); - new (a) DescriptorAddendum{&dt}; + std::size_t elementBytes{dt.sizeInBytes()}; + ISO::EstablishDescriptor( + &raw_, p, attribute, CFI_type_struct, elementBytes, rank, extent); + if (elementBytes == 0) { + raw_.elem_len = 0; + // Reset byte strides of the dimensions, since EstablishDescriptor() + // only does that when the base address is not nullptr. + for (int j{0}; j < rank; ++j) { + GetDimension(j).SetByteStride(0); + } + } + SetHasAddendum(); + new (Addendum()) DescriptorAddendum{&dt}; } RT_API_ATTRS OwningPtr Descriptor::Create(TypeCode t, @@ -141,21 +148,7 @@ RT_API_ATTRS std::size_t Descriptor::SizeInBytes() const { } RT_API_ATTRS std::size_t Descriptor::Elements() const { - int n{rank()}; - std::size_t elements{1}; - for (int j{0}; j < n; ++j) { - elements *= GetDimension(j).Extent(); - } - return elements; -} - -RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) { -#ifdef RT_DEVICE_COMPILATION - // Force default allocator in device code. - return kDefaultAllocator; -#else - return desc.GetAllocIdx(); -#endif + return InlineElements(); } RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) { @@ -166,7 +159,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) { elementBytes = raw_.elem_len = 0; } std::size_t byteSize{Elements() * elementBytes}; - AllocFct alloc{allocatorRegistry.GetAllocator(MapAllocIdx(*this))}; + AllocFct alloc{allocatorRegistry.GetAllocator(MapAllocIdx())}; // Zero size allocation is possible in Fortran and the resulting // descriptor must be allocated/associated. Since std::malloc(0) // result is implementation defined, always allocate at least one byte. @@ -207,18 +200,6 @@ RT_API_ATTRS int Descriptor::Destroy( } } -RT_API_ATTRS int Descriptor::Deallocate() { - ISO::CFI_cdesc_t &descriptor{raw()}; - if (!descriptor.base_addr) { - return CFI_ERROR_BASE_ADDR_NULL; - } else { - FreeFct free{allocatorRegistry.GetDeallocator(MapAllocIdx(*this))}; - free(descriptor.base_addr); - descriptor.base_addr = nullptr; - return CFI_SUCCESS; - } -} - RT_API_ATTRS bool Descriptor::DecrementSubscripts( SubscriptValue *subscript, const int *permutation) const { for (int j{raw_.rank - 1}; j >= 0; --j) { diff --git a/flang-rt/lib/runtime/pointer.cpp b/flang-rt/lib/runtime/pointer.cpp index 7331f7bbc3a75..04487abd3272e 100644 --- a/flang-rt/lib/runtime/pointer.cpp +++ b/flang-rt/lib/runtime/pointer.cpp @@ -115,10 +115,12 @@ void RTDEF(PointerAssociateRemapping)(Descriptor &pointer, byteStride *= dim.Extent(); } } - if (pointer.Elements() > target.Elements()) { + std::size_t pointerElements{pointer.Elements()}; + std::size_t targetElements{target.Elements()}; + if (pointerElements > targetElements) { terminator.Crash("PointerAssociateRemapping: too many elements in remapped " "pointer (%zd > %zd)", - pointer.Elements(), target.Elements()); + pointerElements, targetElements); } } diff --git a/flang-rt/lib/runtime/work-queue.cpp b/flang-rt/lib/runtime/work-queue.cpp index a508ecb637102..42dbc9064b03b 100644 --- a/flang-rt/lib/runtime/work-queue.cpp +++ b/flang-rt/lib/runtime/work-queue.cpp @@ -21,21 +21,6 @@ static constexpr bool enableDebugOutput{false}; RT_OFFLOAD_API_GROUP_BEGIN -RT_API_ATTRS Componentwise::Componentwise(const typeInfo::DerivedType &derived) - : derived_{derived}, components_{derived_.component().Elements()} { - GetComponent(); -} - -RT_API_ATTRS void Componentwise::GetComponent() { - if (IsComplete()) { - component_ = nullptr; - } else { - const Descriptor &componentDesc{derived_.component()}; - component_ = componentDesc.ZeroBasedIndexedElement( - componentAt_); - } -} - RT_API_ATTRS int Ticket::Continue(WorkQueue &workQueue) { if (!begun) { begun = true; @@ -53,19 +38,21 @@ RT_API_ATTRS int Ticket::Continue(WorkQueue &workQueue) { } RT_API_ATTRS WorkQueue::~WorkQueue() { - if (last_) { - if ((last_->next = firstFree_)) { - last_->next->previous = last_; + if (anyDynamicAllocation_) { + if (last_) { + if ((last_->next = firstFree_)) { + last_->next->previous = last_; + } + firstFree_ = first_; + first_ = last_ = nullptr; } - firstFree_ = first_; - first_ = last_ = nullptr; - } - while (firstFree_) { - TicketList *next{firstFree_->next}; - if (!firstFree_->isStatic) { - FreeMemory(firstFree_); + while (firstFree_) { + TicketList *next{firstFree_->next}; + if (!firstFree_->isStatic) { + FreeMemory(firstFree_); + } + firstFree_ = next; } - firstFree_ = next; } } @@ -74,6 +61,7 @@ RT_API_ATTRS Ticket &WorkQueue::StartTicket() { void *p{AllocateMemoryOrCrash(terminator_, sizeof(TicketList))}; firstFree_ = new (p) TicketList; firstFree_->isStatic = false; + anyDynamicAllocation_ = true; } TicketList *newTicket{firstFree_}; if ((firstFree_ = newTicket->next)) {