1010#include < util/system/unaligned_mem.h>
1111#include < util/memory/pool.h>
1212
13+ #include < bit>
1314#include < deque>
1415#include < type_traits>
1516
1617namespace NKikimr {
1718
18- #pragma pack(push,4)
19+ static_assert (std::endian::native == std::endian::little, " TCell expects little endian architecture for data packing" );
20+
1921// Represents one element in a tuple
2022// Doesn't own the memory buffer that stores the actual value
21- // Small values (<= 8 bytes) are stored inline
23+ // Small values (<= 14 bytes) are stored inline
2224struct TCell {
2325 template <typename T>
2426 using TStdLayout = std::enable_if_t <std::is_standard_layout<T>::value, T>;
2527
28+ public:
29+ // 14 bytes ensures parity with TUnboxedValuePod
30+ static constexpr size_t MaxInlineSize () { return 14 ; }
31+ static constexpr bool CanInline (size_t size) { return size <= MaxInlineSize (); }
32+
2633private:
27- ui32 DataSize_ : 30 ;
28- ui32 IsInline_ : 1 ;
29- ui32 IsNull_ : 1 ;
30- union {
31- i64 IntVal;
34+ // NotInline (low bit):
35+ // 0: IsInline() returns true
36+ // 1: IsInline() returns false
37+ // NotRefValue (high bit):
38+ // 0: data is TRefValue
39+ // 1: data is TInlineValue
40+ // Null values are IsInline() with a nullptr data pointer and zero size
41+ // TCell filled with zeroes is always a null value
42+ // This bit arrangment allows methods to check a single flag
43+ enum EKind : ui8 {
44+ // Both bits 0: null value
45+ KindNull = 0 ,
46+ // Low bit 1: ref value
47+ KindRefValue = 1 ,
48+ // High bit 1: inline value
49+ KindInlineValue = 2 ,
50+ };
51+
52+ struct TRefValue {
3253 const char * Ptr;
33- double DoubleVal;
34- float FloatVal;
35- char Bytes[8 ];
54+ ui64 Size : 62 ;
55+ ui64 Kind : 2 ;
56+ };
57+
58+ struct TInlineValue {
59+ char Data[15 ];
60+ ui8 Size : 6 ;
61+ ui8 Kind : 2 ;
62+ };
63+
64+ private:
65+ union {
66+ char Raw[16 ];
67+ TRefValue Ref;
68+ TInlineValue Inline;
3669 };
3770
71+ constexpr ui8 GetKind () const noexcept {
72+ return ui8 (Raw[15 ]) >> 6 ;
73+ }
74+
75+ constexpr bool IsRefValue () const noexcept {
76+ return (GetKind () & 2 ) == 0 ;
77+ }
78+
79+ constexpr bool HasInlineFlag () const noexcept {
80+ return (GetKind () & 1 ) == 0 ;
81+ }
82+
3883public:
3984 TCell ()
4085 : TCell(nullptr , 0 )
4186 {}
4287
4388 TCell (TArrayRef<const char > ref)
44- : TCell(ref.begin(), ui32(ref.size()))
45- {
46- Y_ABORT_UNLESS (ref.size () < Max<ui32>(), " Too large blob size for TCell" );
47- }
48-
49- TCell (const char * ptr, ui32 size)
50- : DataSize_(size)
51- , IsInline_(0 )
52- , IsNull_(ptr == nullptr )
53- , Ptr(ptr)
54- {
55- Y_DEBUG_ABORT_UNLESS (ptr || size == 0 );
56-
57- if (CanInline (size)) {
58- IsInline_ = 1 ;
59- IntVal = 0 ;
89+ : TCell(ref.data(), ref.size())
90+ {}
6091
92+ TCell (const char * ptr, size_t size) {
93+ if (!ptr) {
94+ Y_DEBUG_ABORT_UNLESS (size == 0 );
95+ // All zeroes represents the null value
96+ ::memset (Raw, 0 , 16 );
97+ } else if (CanInline (size)) {
98+ // We use switch with constant size memcpy for better codegen
6199 switch (size) {
62- case 8 : memcpy (&IntVal, ptr, 8 ); break ;
63- case 7 : memcpy (&IntVal, ptr, 7 ); break ;
64- case 6 : memcpy (&IntVal, ptr, 6 ); break ;
65- case 5 : memcpy (&IntVal, ptr, 5 ); break ;
66- case 4 : memcpy (&IntVal, ptr, 4 ); break ;
67- case 3 : memcpy (&IntVal, ptr, 3 ); break ;
68- case 2 : memcpy (&IntVal, ptr, 2 ); break ;
69- case 1 : memcpy (&IntVal, ptr, 1 ); break ;
100+ case 15 : ::memcpy (Inline.Data , ptr, 15 ); break ;
101+ case 14 : ::memcpy (Inline.Data , ptr, 14 ); break ;
102+ case 13 : ::memcpy (Inline.Data , ptr, 13 ); break ;
103+ case 12 : ::memcpy (Inline.Data , ptr, 12 ); break ;
104+ case 11 : ::memcpy (Inline.Data , ptr, 11 ); break ;
105+ case 10 : ::memcpy (Inline.Data , ptr, 10 ); break ;
106+ case 9 : ::memcpy (Inline.Data , ptr, 9 ); break ;
107+ case 8 : ::memcpy (Inline.Data , ptr, 8 ); break ;
108+ case 7 : ::memcpy (Inline.Data , ptr, 7 ); break ;
109+ case 6 : ::memcpy (Inline.Data , ptr, 6 ); break ;
110+ case 5 : ::memcpy (Inline.Data , ptr, 5 ); break ;
111+ case 4 : ::memcpy (Inline.Data , ptr, 4 ); break ;
112+ case 3 : ::memcpy (Inline.Data , ptr, 3 ); break ;
113+ case 2 : ::memcpy (Inline.Data , ptr, 2 ); break ;
114+ case 1 : ::memcpy (Inline.Data , ptr, 1 ); break ;
115+ case 0 : break ;
116+ default : Y_ABORT (" unreachable" );
70117 }
118+ Inline.Size = size;
119+ Inline.Kind = KindInlineValue;
120+ } else {
121+ Y_DEBUG_ABORT_UNLESS (size <= Max<ui32>());
122+ Ref.Ptr = ptr;
123+ Ref.Size = size;
124+ Ref.Kind = KindRefValue;
71125 }
72126 }
73127
74128 explicit TCell (const TRawTypeValue* v)
75129 : TCell((const char *)v->Data(), v->Size())
76130 {}
77131
78- explicit operator bool () const
79- {
132+ constexpr explicit operator bool () const {
80133 return !IsNull ();
81134 }
82135
83- bool IsInline () const { return IsInline_; }
84- bool IsNull () const { return IsNull_; }
85- ui32 Size () const { return DataSize_; }
136+ constexpr bool IsNull () const {
137+ return GetKind () == KindNull;
138+ }
139+
140+ constexpr bool IsInline () const {
141+ return HasInlineFlag ();
142+ }
143+
144+ constexpr const char * InlineData () const {
145+ Y_DEBUG_ABORT_UNLESS (IsInline ());
146+ return Raw;
147+ }
148+
149+ constexpr const char * Data () const {
150+ return IsRefValue () ? Ref.Ptr : Inline.Data ;
151+ }
152+
153+ constexpr ui32 Size () const {
154+ return IsRefValue () ? Ref.Size : Inline.Size ;
155+ }
86156
87157 TArrayRef<const char > AsRef () const noexcept
88158 {
@@ -104,7 +174,7 @@ struct TCell {
104174
105175 template <typename T, typename = TStdLayout<T>>
106176 bool ToValue (T& value, TString& err) const noexcept {
107- if (sizeof (T) != Size ()) {
177+ if (Y_UNLIKELY ( sizeof (T) != Size () )) {
108178 err = Sprintf (" ToValue<T>() type size %" PRISZT " doesn't match TCell size %" PRIu32, sizeof (T), Size ());
109179 return false ;
110180 }
@@ -127,46 +197,40 @@ struct TCell {
127197 static inline TCell Make (const T& val) noexcept {
128198 auto *ptr = static_cast <const char *>(static_cast <const void *>(&val));
129199
130- return TCell{ ptr, sizeof (val) };
131- }
132-
133- #if 1
134- // Optimization to store small values (<= 8 bytes) inplace
135- static constexpr bool CanInline (ui32 sz) { return sz <= 8 ; }
136- static constexpr size_t MaxInlineSize () { return 8 ; }
137- const char * InlineData () const { Y_DEBUG_ABORT_UNLESS (IsInline_); return IsNull_ ? nullptr : (char *)&IntVal; }
138- const char * Data () const { return IsNull_ ? nullptr : (IsInline_ ? (char *)&IntVal : Ptr); }
139- #else
140- // Non-inlinable version for perf comparisons
141- static bool CanInline(ui32) { return false; }
142- const char* InlineData() const { Y_DEBUG_ABORT_UNLESS(!IsInline_); return Ptr; }
143- const char* Data() const { Y_DEBUG_ABORT_UNLESS(!IsInline_); return Ptr; }
144- #endif
145-
146- void CopyDataInto (char * dst) const {
147- if (IsInline_) {
148- switch (DataSize_) {
149- case 8 : memcpy (dst, &IntVal, 8 ); break ;
150- case 7 : memcpy (dst, &IntVal, 7 ); break ;
151- case 6 : memcpy (dst, &IntVal, 6 ); break ;
152- case 5 : memcpy (dst, &IntVal, 5 ); break ;
153- case 4 : memcpy (dst, &IntVal, 4 ); break ;
154- case 3 : memcpy (dst, &IntVal, 3 ); break ;
155- case 2 : memcpy (dst, &IntVal, 2 ); break ;
156- case 1 : memcpy (dst, &IntVal, 1 ); break ;
157- }
158- return ;
159- }
200+ return TCell (ptr, sizeof (val));
201+ }
160202
161- if (Ptr) {
162- memcpy (dst, Ptr, DataSize_);
203+ void CopyDataInto (char * dst) const {
204+ if (IsRefValue ()) {
205+ if (Ref.Size > 0 ) {
206+ ::memcpy (dst, Ref.Ptr, Ref.Size);
207+ }
208+ } else {
209+ // We use switch with constant size memcpy for better codegen
210+ switch (Inline.Size ) {
211+ case 15 : ::memcpy (dst, Inline.Data , 15 ); break ;
212+ case 14 : ::memcpy (dst, Inline.Data , 14 ); break ;
213+ case 13 : ::memcpy (dst, Inline.Data , 13 ); break ;
214+ case 12 : ::memcpy (dst, Inline.Data , 12 ); break ;
215+ case 11 : ::memcpy (dst, Inline.Data , 11 ); break ;
216+ case 10 : ::memcpy (dst, Inline.Data , 10 ); break ;
217+ case 9 : ::memcpy (dst, Inline.Data , 9 ); break ;
218+ case 8 : ::memcpy (dst, Inline.Data , 8 ); break ;
219+ case 7 : ::memcpy (dst, Inline.Data , 7 ); break ;
220+ case 6 : ::memcpy (dst, Inline.Data , 6 ); break ;
221+ case 5 : ::memcpy (dst, Inline.Data , 5 ); break ;
222+ case 4 : ::memcpy (dst, Inline.Data , 4 ); break ;
223+ case 3 : ::memcpy (dst, Inline.Data , 3 ); break ;
224+ case 2 : ::memcpy (dst, Inline.Data , 2 ); break ;
225+ case 1 : ::memcpy (dst, Inline.Data , 1 ); break ;
226+ case 0 : break ;
227+ default : Y_ABORT (" unreachable" );
228+ }
163229 }
164230 }
165231};
166232
167- #pragma pack(pop)
168-
169- static_assert (sizeof (TCell) == 12 , " TCell must be 12 bytes" );
233+ static_assert (sizeof (TCell) == 16 , " TCell must be 16 bytes" );
170234using TCellsRef = TConstArrayRef<const TCell>;
171235
172236inline size_t EstimateSize (TCellsRef cells) {
@@ -196,8 +260,8 @@ struct TCellVectorsEquals {
196260};
197261
198262inline int CompareCellsAsByteString (const TCell& a, const TCell& b, bool isDescending) {
199- const char * pa = ( const char *) a.Data ();
200- const char * pb = ( const char *) b.Data ();
263+ const char * pa = a.Data ();
264+ const char * pb = b.Data ();
201265 size_t sza = a.Size ();
202266 size_t szb = b.Size ();
203267 int cmp = memcmp (pa, pb, sza < szb ? sza : szb);
@@ -227,6 +291,14 @@ inline int CompareTypedCells(const TCell& a, const TCell& b, const NScheme::TTyp
227291 return va == vb ? 0 : ((va < vb) != type.IsDescending () ? -1 : 1 ); \
228292 }
229293
294+ #define LARGER_TYPE_SWITCH (typeEnum, castType ) \
295+ case NKikimr::NScheme::NTypeIds::typeEnum: \
296+ { \
297+ castType va = ReadUnaligned<castType>((const castType*)a.Data ()); \
298+ castType vb = ReadUnaligned<castType>((const castType*)b.Data ()); \
299+ return va == vb ? 0 : ((va < vb) != type.IsDescending () ? -1 : 1 ); \
300+ }
301+
230302 SIMPLE_TYPE_SWITCH (Int8, i8 );
231303 SIMPLE_TYPE_SWITCH (Int16, i16 );
232304 SIMPLE_TYPE_SWITCH (Uint16, ui16);
@@ -238,7 +310,7 @@ inline int CompareTypedCells(const TCell& a, const TCell& b, const NScheme::TTyp
238310 SIMPLE_TYPE_SWITCH (Bool, ui8);
239311 SIMPLE_TYPE_SWITCH (Double, double );
240312 SIMPLE_TYPE_SWITCH (Float, float );
241- SIMPLE_TYPE_SWITCH (PairUi64Ui64, TPair);
313+ LARGER_TYPE_SWITCH (PairUi64Ui64, TPair);
242314 SIMPLE_TYPE_SWITCH (Date, ui16);
243315 SIMPLE_TYPE_SWITCH (Datetime, ui32);
244316 SIMPLE_TYPE_SWITCH (Timestamp, ui64);
@@ -249,6 +321,7 @@ inline int CompareTypedCells(const TCell& a, const TCell& b, const NScheme::TTyp
249321 SIMPLE_TYPE_SWITCH (Interval64, i64 );
250322
251323#undef SIMPLE_TYPE_SWITCH
324+ #undef LARGER_TYPE_SWITCH
252325
253326 case NKikimr::NScheme::NTypeIds::String:
254327 case NKikimr::NScheme::NTypeIds::String4k:
0 commit comments