@@ -588,10 +588,6 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
588588 " Elements must not have destructors (they won't be called)." );
589589
590590private:
591- // / The type of the elements of the indices array. TODO: use one or two byte
592- // / indices for smaller tables to save more memory.
593- using Index = unsigned ;
594-
595591 // / The reciprocal of the load factor at which we expand the table. A value of
596592 // / 4 means that we resize at 1/4 = 75% load factor.
597593 static const size_t ResizeProportion = 4 ;
@@ -619,20 +615,77 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
619615 // / is stored inline. We work around this contradiction by considering the
620616 // / first index to always be occupied with a value that never matches any key.
621617 struct IndexStorage {
622- std::atomic<Index> Mask;
618+ // Index size is variable based on capacity, either 8, 16, or 32 bits.
619+ //
620+ // This is somewhat conservative. We could have, for example, a capacity of
621+ // 512 but a maximum index of only 200, which would still allow for 8-bit
622+ // indices. However, taking advantage of this would require reallocating
623+ // the index storage when the element count crossed a threshold, which is
624+ // more complex, and the advantages are minimal. This keeps it simple.
625+ //
626+ // The first byte of the storage is the log 2 of the capacity. The remaining
627+ // storage is then an array of 8, 16, or 32 bit integers, depending on the
628+ // capacity number. This union allows us to access the capacity, and then
629+ // access the rest of the storage by taking the address of one of the
630+ // IndexZero members and indexing into it (always avoiding index 0).
631+ union {
632+ uint8_t CapacityLog2;
633+ std::atomic<uint8_t > IndexZero8;
634+ std::atomic<uint16_t > IndexZero16;
635+ std::atomic<uint32_t > IndexZero32;
636+ };
637+
638+ // Get the size, in bytes, of the index needed for the given capacity.
639+ static unsigned indexSize (uint8_t capacityLog2) {
640+ if (capacityLog2 <= sizeof (uint8_t ) * CHAR_BIT)
641+ return sizeof (uint8_t );
642+ if (capacityLog2 <= sizeof (uint16_t ) * CHAR_BIT)
643+ return sizeof (uint16_t );
644+ return sizeof (uint32_t );
645+ }
646+
647+ unsigned indexSize () { return indexSize (CapacityLog2); }
623648
624- static IndexStorage *allocate (size_t capacity ) {
625- assert ((capacity & (capacity - 1 )) == 0 &&
626- " Capacity must be a power of 2 " ) ;
627- auto *ptr =
628- reinterpret_cast <IndexStorage *>( calloc (capacity, sizeof (Mask )));
649+ static IndexStorage *allocate (size_t capacityLog2 ) {
650+ assert (capacityLog2 > 0 );
651+ size_t capacity = 1UL << capacityLog2 ;
652+ auto *ptr = reinterpret_cast <IndexStorage *>(
653+ calloc (capacity, indexSize (capacityLog2 )));
629654 if (!ptr)
630655 swift::crash (" Could not allocate memory." );
631- ptr->Mask . store (capacity - 1 , std::memory_order_relaxed) ;
656+ ptr->CapacityLog2 = capacityLog2 ;
632657 return ptr;
633658 }
634659
635- std::atomic<Index> &at (size_t i) { return (&Mask)[i]; }
660+ unsigned loadIndexAt (size_t i, std::memory_order order) {
661+ assert (i > 0 && " index zero is off-limits, used to store capacity" );
662+
663+ switch (indexSize ()) {
664+ case sizeof (uint8_t ):
665+ return (&IndexZero8)[i].load (order);
666+ case sizeof (uint16_t ):
667+ return (&IndexZero16)[i].load (order);
668+ case sizeof (uint32_t ):
669+ return (&IndexZero32)[i].load (order);
670+ default :
671+ swift_runtime_unreachable (" unknown index size" );
672+ }
673+ }
674+
675+ void storeIndexAt (unsigned value, size_t i, std::memory_order order) {
676+ assert (i > 0 && " index zero is off-limits, used to store capacity" );
677+
678+ switch (indexSize ()) {
679+ case sizeof (uint8_t ):
680+ return (&IndexZero8)[i].store (value, order);
681+ case sizeof (uint16_t ):
682+ return (&IndexZero16)[i].store (value, order);
683+ case sizeof (uint32_t ):
684+ return (&IndexZero32)[i].store (value, order);
685+ default :
686+ swift_runtime_unreachable (" unknown index size" );
687+ }
688+ }
636689 };
637690
638691 // / A simple linked list representing pointers that need to be freed.
@@ -720,27 +773,31 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
720773 // / returning the new array with all existing indices copied into it. This
721774 // / operation performs a rehash, so that the indices are in the correct
722775 // / location in the new array.
723- IndexStorage *resize (IndexStorage *indices, Index indicesMask ,
776+ IndexStorage *resize (IndexStorage *indices, uint8_t indicesCapacityLog2 ,
724777 ElemTy *elements) {
725- // Mask is size - 1. Double the size. Start with 4 (fits into 16-byte malloc
726- // bucket).
727- size_t newCount = indices ? 2 * (indicesMask + 1 ) : 4 ;
728- size_t newMask = newCount - 1 ;
778+ // Double the size. Start with 16 (fits into 16-byte malloc
779+ // bucket), which is 2^4 .
780+ size_t newCapacityLog2 = indices ? indicesCapacityLog2 + 1 : 4 ;
781+ size_t newMask = ( 1UL << newCapacityLog2) - 1 ;
729782
730- IndexStorage *newIndices = IndexStorage::allocate (newCount );
783+ IndexStorage *newIndices = IndexStorage::allocate (newCapacityLog2 );
731784
732- for (size_t i = 1 ; i <= indicesMask; i++) {
733- Index index = indices->at (i).load (std::memory_order_relaxed);
785+ size_t indicesCount = 1UL << indicesCapacityLog2;
786+ for (size_t i = 1 ; i < indicesCount; i++) {
787+ unsigned index = indices->loadIndexAt (i, std::memory_order_relaxed);
734788 if (index == 0 )
735789 continue ;
736790
737791 auto *element = &elements[index - 1 ];
738792 auto hash = hash_value (*element);
739793
740794 size_t newI = hash & newMask;
741- while (newIndices->at (newI) != 0 )
795+ // Index 0 is unusable (occupied by the capacity), so always skip it.
796+ while (newI == 0 ||
797+ newIndices->loadIndexAt (newI, std::memory_order_relaxed) != 0 ) {
742798 newI = (newI + 1 ) & newMask;
743- newIndices->at (newI).store (index, std::memory_order_relaxed);
799+ }
800+ newIndices->storeIndexAt (index, newI, std::memory_order_relaxed);
744801 }
745802
746803 Indices.store (newIndices, std::memory_order_release);
@@ -752,32 +809,31 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
752809
753810 // / Search for the given key within the given indices and elements arrays. If
754811 // / an entry already exists for that key, return a pointer to the element. If
755- // / no entry exists, return a pointer to the location in the indices array
756- // / where the index of the new element would be stored.
812+ // / no entry exists, return the location in the indices array where the index
813+ // / of the new element would be stored.
757814 template <class KeyTy >
758- static std::pair<ElemTy *, std::atomic<Index> * >
815+ static std::pair<ElemTy *, unsigned >
759816 find (const KeyTy &key, IndexStorage *indices, size_t elementCount,
760817 ElemTy *elements) {
761818 if (!indices)
762- return {nullptr , nullptr };
819+ return {nullptr , 0 };
763820 auto hash = hash_value (key);
764- auto indicesMask = indices->Mask . load (std::memory_order_relaxed) ;
821+ auto indicesMask = ( 1UL << indices->CapacityLog2 ) - 1 ;
765822
766823 auto i = hash & indicesMask;
767824 while (true ) {
768825 // Index 0 is used for the mask and is not actually an index.
769826 if (i == 0 )
770827 i++;
771828
772- auto *indexPtr = &indices->at (i);
773- auto index = indexPtr->load (std::memory_order_acquire);
829+ auto index = indices->loadIndexAt (i, std::memory_order_acquire);
774830 // Element indices are 1-based, 0 means no entry.
775831 if (index == 0 )
776- return {nullptr , indexPtr };
832+ return {nullptr , i };
777833 if (index - 1 < elementCount) {
778834 auto *candidate = &elements[index - 1 ];
779835 if (candidate->matchesKey (key))
780- return {candidate, nullptr };
836+ return {candidate, 0 };
781837 }
782838
783839 i = (i + 1 ) & indicesMask;
@@ -895,7 +951,7 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
895951 if (!indices)
896952 indices = resize (indices, 0 , nullptr );
897953
898- auto indicesMask = indices->Mask . load (std::memory_order_relaxed) ;
954+ auto indicesCapacityLog2 = indices->CapacityLog2 ;
899955 auto elementCount = ElementCount.load (std::memory_order_relaxed);
900956 auto *elements = Elements.load (std::memory_order_relaxed);
901957
@@ -906,12 +962,14 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
906962 return ;
907963 }
908964
909- // The actual capacity is indicesMask + 1. The number of slots in use is
910- // elementCount + 1, since the mask also takes a slot.
911- auto emptyCount = (indicesMask + 1 ) - (elementCount + 1 );
912- auto proportion = (indicesMask + 1 ) / emptyCount;
965+ auto indicesCapacity = 1UL << indicesCapacityLog2;
966+
967+ // The number of slots in use is elementCount + 1, since the capacity also
968+ // takes a slot.
969+ auto emptyCount = indicesCapacity - (elementCount + 1 );
970+ auto proportion = indicesCapacity / emptyCount;
913971 if (proportion >= ResizeProportion) {
914- indices = resize (indices, indicesMask , elements);
972+ indices = resize (indices, indicesCapacityLog2 , elements);
915973 found = find (key, indices, elementCount, elements);
916974 assert (!found.first && " Shouldn't suddenly find the key after rehashing" );
917975 }
@@ -928,7 +986,8 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
928986 assert (hash_value (key) == hash_value (*element) &&
929987 " Element must have the same hash code as its key." );
930988 ElementCount.store (elementCount + 1 , std::memory_order_release);
931- found.second ->store (elementCount + 1 , std::memory_order_release);
989+ indices->storeIndexAt (elementCount + 1 , found.second ,
990+ std::memory_order_release);
932991 }
933992
934993 deallocateFreeListIfSafe ();
0 commit comments