@@ -620,25 +620,76 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
620
620
// / is stored inline. We work around this contradiction by considering the
621
621
// / first index to always be occupied with a value that never matches any key.
622
622
struct IndexStorage {
623
+ using RawType = uintptr_t ;
624
+
625
+ RawType Value;
626
+
627
+ static constexpr uintptr_t log2 (uintptr_t x) {
628
+ return x <= 1 ? 0 : log2 (x >> 1 ) + 1 ;
629
+ }
630
+
631
+ static constexpr uintptr_t InlineIndexBits = 4 ;
632
+ static constexpr uintptr_t InlineIndexMask = 0xF ;
633
+ static constexpr uintptr_t InlineCapacity =
634
+ sizeof (RawType) * CHAR_BIT / InlineIndexBits;
635
+ static constexpr uintptr_t InlineCapacityLog2 = log2(InlineCapacity);
636
+
637
+ // Indices can be stored in different ways, depending on how big they need
638
+ // to be. The index mode is stored in the bottom two bits of Value. The
639
+ // meaning of the rest of Value depends on the mode.
640
+ enum class IndexMode {
641
+ // Value is treated as an array of four-bit integers, storing the indices.
642
+ // The first element overlaps with the mode, and is never used.
643
+ Inline,
644
+
645
+ // The rest of Value holds a pointer to storage. The first byte of this
646
+ // storage holds the log2 of the storage capacity. The storage is treated
647
+ // as an array of 8, 16, or 32-bit integers. The first element overlaps
648
+ // with the capacity, and is never used.
649
+ Array8,
650
+ Array16,
651
+ Array32,
652
+ };
653
+
654
+ IndexStorage () : Value(0 ) {}
655
+ IndexStorage (RawType value) : Value(value) {}
656
+ IndexStorage (void *ptr, unsigned indexSize, uint8_t capacityLog2) {
657
+ assert (capacityLog2 > InlineCapacityLog2);
658
+ IndexMode mode;
659
+ switch (indexSize) {
660
+ case sizeof (uint8_t ):
661
+ mode = IndexMode::Array8;
662
+ break ;
663
+ case sizeof (uint16_t ):
664
+ mode = IndexMode::Array16;
665
+ break ;
666
+ case sizeof (uint32_t ):
667
+ mode = IndexMode::Array32;
668
+ break ;
669
+ default :
670
+ swift_unreachable (" unknown index size" );
671
+ }
672
+ Value = reinterpret_cast <uintptr_t >(ptr) | static_cast <uintptr_t >(mode);
673
+ *reinterpret_cast <uint8_t *>(ptr) = capacityLog2;
674
+ }
675
+
676
+ bool valueIsPointer () { return Value & 3 ; }
677
+
678
+ void *pointer () {
679
+ if (valueIsPointer ())
680
+ return (void *)(Value & (RawType)~3 );
681
+ return nullptr ;
682
+ }
683
+
684
+ IndexMode indexMode () { return IndexMode (Value & 3 ); }
685
+
623
686
// Index size is variable based on capacity, either 8, 16, or 32 bits.
624
687
//
625
688
// This is somewhat conservative. We could have, for example, a capacity of
626
689
// 512 but a maximum index of only 200, which would still allow for 8-bit
627
690
// indices. However, taking advantage of this would require reallocating
628
691
// the index storage when the element count crossed a threshold, which is
629
692
// more complex, and the advantages are minimal. This keeps it simple.
630
- //
631
- // The first byte of the storage is the log 2 of the capacity. The remaining
632
- // storage is then an array of 8, 16, or 32 bit integers, depending on the
633
- // capacity number. This union allows us to access the capacity, and then
634
- // access the rest of the storage by taking the address of one of the
635
- // IndexZero members and indexing into it (always avoiding index 0).
636
- union {
637
- uint8_t CapacityLog2;
638
- std::atomic<uint8_t > IndexZero8;
639
- std::atomic<uint16_t > IndexZero16;
640
- std::atomic<uint32_t > IndexZero32;
641
- };
642
693
643
694
// Get the size, in bytes, of the index needed for the given capacity.
644
695
static unsigned indexSize (uint8_t capacityLog2) {
@@ -649,46 +700,66 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
649
700
return sizeof (uint32_t );
650
701
}
651
702
652
- unsigned indexSize () { return indexSize (CapacityLog2); }
703
+ uint8_t getCapacityLog2 () {
704
+ if (auto *ptr = pointer ())
705
+ return *reinterpret_cast <uint8_t *>(ptr);
706
+ return InlineCapacityLog2;
707
+ }
653
708
654
- static IndexStorage * allocate (size_t capacityLog2) {
709
+ static IndexStorage allocate (size_t capacityLog2) {
655
710
assert (capacityLog2 > 0 );
656
711
size_t capacity = 1UL << capacityLog2;
657
- auto *ptr = reinterpret_cast <IndexStorage *>(
658
- calloc (capacity, indexSize (capacityLog2)) );
712
+ unsigned size = indexSize (capacityLog2);
713
+ auto *ptr = calloc (capacity, size );
659
714
if (!ptr)
660
715
swift::crash (" Could not allocate memory." );
661
- ptr->CapacityLog2 = capacityLog2;
662
- return ptr;
716
+ return IndexStorage (ptr, size, capacityLog2);
663
717
}
664
718
665
719
unsigned loadIndexAt (size_t i, std::memory_order order) {
666
720
assert (i > 0 && " index zero is off-limits, used to store capacity" );
667
-
668
- switch (indexSize ()) {
669
- case sizeof (uint8_t ):
670
- return (&IndexZero8)[i].load (order);
671
- case sizeof (uint16_t ):
672
- return (&IndexZero16)[i].load (order);
673
- case sizeof (uint32_t ):
674
- return (&IndexZero32)[i].load (order);
675
- default :
676
- swift_unreachable (" unknown index size" );
721
+ assert (i < (1 << getCapacityLog2 ()) &&
722
+ " index is off the end of the indices" );
723
+
724
+ switch (indexMode ()) {
725
+ case IndexMode::Inline:
726
+ return (Value >> (i * InlineIndexBits)) & InlineIndexMask;
727
+ case IndexMode::Array8:
728
+ return ((std::atomic<uint8_t > *)pointer ())[i].load (order);
729
+ case IndexMode::Array16:
730
+ return ((std::atomic<uint16_t > *)pointer ())[i].load (order);
731
+ case IndexMode::Array32:
732
+ return ((std::atomic<uint32_t > *)pointer ())[i].load (order);
677
733
}
678
734
}
679
735
680
- void storeIndexAt (unsigned value, size_t i, std::memory_order order) {
736
+ void storeIndexAt (std::atomic<RawType> *inlineStorage, unsigned value,
737
+ size_t i, std::memory_order order) {
681
738
assert (i > 0 && " index zero is off-limits, used to store capacity" );
682
-
683
- switch (indexSize ()) {
684
- case sizeof (uint8_t ):
685
- return (&IndexZero8)[i].store (value, order);
686
- case sizeof (uint16_t ):
687
- return (&IndexZero16)[i].store (value, order);
688
- case sizeof (uint32_t ):
689
- return (&IndexZero32)[i].store (value, order);
690
- default :
691
- swift_unreachable (" unknown index size" );
739
+ assert (i < (1 << getCapacityLog2 ()) &&
740
+ " index is off the end of the indices" );
741
+
742
+ switch (indexMode ()) {
743
+ case IndexMode::Inline: {
744
+ assert (value == (value & InlineIndexMask) && " value is too big to fit" );
745
+ auto shift = i * InlineIndexBits;
746
+ assert ((Value & (InlineIndexMask << shift)) == 0 &&
747
+ " can't overwrite an existing index" );
748
+ assert (Value == inlineStorage->load (std::memory_order_relaxed) &&
749
+ " writing with a stale IndexStorage" );
750
+ auto newStorage = Value | ((RawType)value << shift);
751
+ inlineStorage->store (newStorage, order);
752
+ break ;
753
+ }
754
+ case IndexMode::Array8:
755
+ ((std::atomic<uint8_t > *)pointer ())[i].store (value, order);
756
+ break ;
757
+ case IndexMode::Array16:
758
+ ((std::atomic<uint16_t > *)pointer ())[i].store (value, order);
759
+ break ;
760
+ case IndexMode::Array32:
761
+ ((std::atomic<uint32_t > *)pointer ())[i].store (value, order);
762
+ break ;
692
763
}
693
764
}
694
765
};
@@ -726,7 +797,11 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
726
797
std::atomic<ElemTy *> Elements{nullptr };
727
798
728
799
// / The array of indices.
729
- std::atomic<IndexStorage *> Indices{nullptr };
800
+ // /
801
+ // / This has to be stored as a IndexStorage::RawType instead of a IndexStorage
802
+ // / because some of our targets don't support interesting structs as atomic
803
+ // / types. See also MetadataCache::TrackingInfo which uses the same technique.
804
+ std::atomic<typename IndexStorage::RawType> Indices{0 };
730
805
731
806
// / The writer lock, which must be taken before any mutation of the table.
732
807
StaticMutex WriterLock;
@@ -778,18 +853,17 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
778
853
// / returning the new array with all existing indices copied into it. This
779
854
// / operation performs a rehash, so that the indices are in the correct
780
855
// / location in the new array.
781
- IndexStorage *resize (IndexStorage *indices, uint8_t indicesCapacityLog2,
782
- ElemTy *elements) {
783
- // Double the size. Start with 16 (fits into 16-byte malloc
784
- // bucket), which is 2^4.
785
- size_t newCapacityLog2 = indices ? indicesCapacityLog2 + 1 : 4 ;
856
+ IndexStorage resize (IndexStorage indices, uint8_t indicesCapacityLog2,
857
+ ElemTy *elements) {
858
+ // Double the size.
859
+ size_t newCapacityLog2 = indicesCapacityLog2 + 1 ;
786
860
size_t newMask = (1UL << newCapacityLog2) - 1 ;
787
861
788
- IndexStorage * newIndices = IndexStorage::allocate (newCapacityLog2);
862
+ IndexStorage newIndices = IndexStorage::allocate (newCapacityLog2);
789
863
790
864
size_t indicesCount = 1UL << indicesCapacityLog2;
791
865
for (size_t i = 1 ; i < indicesCount; i++) {
792
- unsigned index = indices-> loadIndexAt (i, std::memory_order_relaxed);
866
+ unsigned index = indices. loadIndexAt (i, std::memory_order_relaxed);
793
867
if (index == 0 )
794
868
continue ;
795
869
@@ -799,15 +873,16 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
799
873
size_t newI = hash & newMask;
800
874
// Index 0 is unusable (occupied by the capacity), so always skip it.
801
875
while (newI == 0 ||
802
- newIndices-> loadIndexAt (newI, std::memory_order_relaxed) != 0 ) {
876
+ newIndices. loadIndexAt (newI, std::memory_order_relaxed) != 0 ) {
803
877
newI = (newI + 1 ) & newMask;
804
878
}
805
- newIndices-> storeIndexAt (index, newI, std::memory_order_relaxed);
879
+ newIndices. storeIndexAt (nullptr , index, newI, std::memory_order_relaxed);
806
880
}
807
881
808
- Indices.store (newIndices, std::memory_order_release);
882
+ Indices.store (newIndices. Value , std::memory_order_release);
809
883
810
- FreeListNode::add (&FreeList, indices);
884
+ if (auto *ptr = indices.pointer ())
885
+ FreeListNode::add (&FreeList, ptr);
811
886
812
887
return newIndices;
813
888
}
@@ -818,20 +893,18 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
818
893
// / of the new element would be stored.
819
894
template <class KeyTy >
820
895
static std::pair<ElemTy *, unsigned >
821
- find (const KeyTy &key, IndexStorage * indices, size_t elementCount,
896
+ find (const KeyTy &key, IndexStorage indices, size_t elementCount,
822
897
ElemTy *elements) {
823
- if (!indices)
824
- return {nullptr , 0 };
825
898
auto hash = hash_value (key);
826
- auto indicesMask = (1UL << indices-> CapacityLog2 ) - 1 ;
899
+ auto indicesMask = (1UL << indices. getCapacityLog2 () ) - 1 ;
827
900
828
901
auto i = hash & indicesMask;
829
902
while (true ) {
830
903
// Index 0 is used for the mask and is not actually an index.
831
904
if (i == 0 )
832
905
i++;
833
906
834
- auto index = indices-> loadIndexAt (i, std::memory_order_acquire);
907
+ auto index = indices. loadIndexAt (i, std::memory_order_acquire);
835
908
// Element indices are 1-based, 0 means no entry.
836
909
if (index == 0 )
837
910
return {nullptr , i};
@@ -864,12 +937,12 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
864
937
// / Readers take a snapshot of the hash map, then work with the snapshot.
865
938
class Snapshot {
866
939
ConcurrentReadableHashMap *Map;
867
- IndexStorage * Indices;
940
+ IndexStorage Indices;
868
941
ElemTy *Elements;
869
942
size_t ElementCount;
870
943
871
944
public:
872
- Snapshot (ConcurrentReadableHashMap *map, IndexStorage * indices,
945
+ Snapshot (ConcurrentReadableHashMap *map, IndexStorage indices,
873
946
ElemTy *elements, size_t elementCount)
874
947
: Map(map), Indices(indices), Elements(elements),
875
948
ElementCount (elementCount) {}
@@ -885,7 +958,7 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
885
958
// / Search for an element matching the given key. Returns a pointer to the
886
959
// / found element, or nullptr if no matching element exists.
887
960
template <class KeyTy > const ElemTy *find (const KeyTy &key) {
888
- if (!Indices || !ElementCount || !Elements)
961
+ if (!Indices. Value || !ElementCount || !Elements)
889
962
return nullptr ;
890
963
return ConcurrentReadableHashMap::find (key, Indices, ElementCount,
891
964
Elements)
@@ -917,7 +990,7 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
917
990
// pointer can just mean a concurrent insert that triggered a resize of the
918
991
// elements array. This is harmless aside from a small performance hit, and
919
992
// should not happen often.
920
- IndexStorage * indices;
993
+ IndexStorage indices;
921
994
size_t elementCount;
922
995
ElemTy *elements;
923
996
ElemTy *elements2;
@@ -951,11 +1024,8 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
951
1024
void getOrInsert (KeyTy key, const Call &call) {
952
1025
StaticScopedLock guard (WriterLock);
953
1026
954
- auto *indices = Indices.load (std::memory_order_relaxed);
955
- if (!indices)
956
- indices = resize (indices, 0 , nullptr );
957
-
958
- auto indicesCapacityLog2 = indices->CapacityLog2 ;
1027
+ auto indices = IndexStorage{Indices.load (std::memory_order_relaxed)};
1028
+ auto indicesCapacityLog2 = indices.getCapacityLog2 ();
959
1029
auto elementCount = ElementCount.load (std::memory_order_relaxed);
960
1030
auto *elements = Elements.load (std::memory_order_relaxed);
961
1031
@@ -990,8 +1060,8 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
990
1060
assert (hash_value (key) == hash_value (*element) &&
991
1061
" Element must have the same hash code as its key." );
992
1062
ElementCount.store (elementCount + 1 , std::memory_order_release);
993
- indices-> storeIndexAt (elementCount + 1 , found.second ,
994
- std::memory_order_release);
1063
+ indices. storeIndexAt (&Indices, elementCount + 1 , found.second ,
1064
+ std::memory_order_release);
995
1065
}
996
1066
997
1067
deallocateFreeListIfSafe ();
@@ -1002,17 +1072,18 @@ template <class ElemTy> struct ConcurrentReadableHashMap {
1002
1072
void clear () {
1003
1073
StaticScopedLock guard (WriterLock);
1004
1074
1005
- auto * indices = Indices.load (std::memory_order_relaxed);
1075
+ IndexStorage indices = Indices.load (std::memory_order_relaxed);
1006
1076
auto *elements = Elements.load (std::memory_order_relaxed);
1007
1077
1008
1078
// Order doesn't matter here, snapshots will gracefully handle any field
1009
1079
// being NULL/0 while the others are not.
1010
- Indices.store (nullptr , std::memory_order_relaxed);
1080
+ Indices.store (0 , std::memory_order_relaxed);
1011
1081
ElementCount.store (0 , std::memory_order_relaxed);
1012
1082
Elements.store (nullptr , std::memory_order_relaxed);
1013
1083
ElementCapacity = 0 ;
1014
1084
1015
- FreeListNode::add (&FreeList, indices);
1085
+ if (auto *ptr = indices.pointer ())
1086
+ FreeListNode::add (&FreeList, ptr);
1016
1087
FreeListNode::add (&FreeList, elements);
1017
1088
1018
1089
deallocateFreeListIfSafe ();
0 commit comments