1010#include < string_view>
1111#include < type_traits>
1212
13+ #include < cassert>
14+
1315namespace {
1416using namespace clickhouse ;
1517
@@ -105,13 +107,13 @@ inline void AppendToDictionary(Column& dictionary, const ItemView & item) {
105107 }
106108}
107109
108- // Add special NULL-item, which is expected at pos(0) in dictionary,
110+ // A special NULL-item, which is expected at pos(0) in dictionary,
109111// note that we distinguish empty string from NULL-value.
110- inline void AppendNullItemToDictionary ( ColumnRef dictionary) {
112+ inline auto GetNullItemForDictionary ( const ColumnRef dictionary) {
111113 if (auto n = dictionary->As <ColumnNullable>()) {
112- AppendToDictionary (*dictionary, ItemView{}) ;
114+ return ItemView{};
113115 } else {
114- AppendToDictionary (*dictionary, ItemView{dictionary->Type ()->GetCode (), std::string_view{}}) ;
116+ return ItemView{dictionary->Type ()->GetCode (), std::string_view{}};
115117 }
116118}
117119
@@ -120,23 +122,21 @@ inline void AppendNullItemToDictionary(ColumnRef dictionary) {
120122namespace clickhouse {
121123ColumnLowCardinality::ColumnLowCardinality (ColumnRef dictionary_column)
122124 : Column(Type::CreateLowCardinality(dictionary_column->Type ())),
123- dictionary_column_(dictionary_column),
125+ dictionary_column_(dictionary_column-> Slice ( 0 , 0 )), // safe way to get an column of the same type.
124126 index_column_(std::make_shared<ColumnUInt32>())
125127{
126- if (dictionary_column_->Size () != 0 ) {
127- // When dictionary column was constructed with values, re-add values by copying to update index and unique_items_map.
128-
129- // Steal values into temporary column.
130- auto values = dictionary_column_->Slice (0 , 0 );
131- values->Swap (*dictionary_column_);
132-
133- AppendNullItemToDictionary (dictionary_column_);
134-
135- // Re-add values, updating index and unique_items_map.
136- for (size_t i = 0 ; i < values->Size (); ++i)
137- AppendUnsafe (values->GetItem (i));
128+ if (dictionary_column->Size () != 0 ) {
129+ AppendNullItemToEmptyColumn ();
130+
131+ // Add values, updating index_column_ and unique_items_map_.
132+ for (size_t i = 0 ; i < dictionary_column->Size (); ++i) {
133+ // TODO: it would be possible to eliminate copying
134+ // by adding InsertUnsafe(pos, ItemView) method to a Column,
135+ // but that is too much work for now.
136+ AppendUnsafe (dictionary_column->GetItem (i));
137+ }
138138 } else {
139- AppendNullItemToDictionary (dictionary_column_ );
139+ AppendNullItemToEmptyColumn ( );
140140 }
141141}
142142
@@ -288,6 +288,9 @@ void ColumnLowCardinality::Save(CodedOutputStream* output) {
288288void ColumnLowCardinality::Clear () {
289289 index_column_->Clear ();
290290 dictionary_column_->Clear ();
291+ unique_items_map_.clear ();
292+
293+ AppendNullItemToEmptyColumn ();
291294}
292295
293296size_t ColumnLowCardinality::Size () const {
@@ -298,8 +301,7 @@ ColumnRef ColumnLowCardinality::Slice(size_t begin, size_t len) {
298301 begin = std::min (begin, Size ());
299302 len = std::min (len, Size () - begin);
300303
301- ColumnRef new_dictionary = dictionary_column_->Slice (0 , 0 );
302- auto result = std::make_shared<ColumnLowCardinality>(new_dictionary);
304+ auto result = std::make_shared<ColumnLowCardinality>(dictionary_column_->Slice (0 , 0 ));
303305
304306 for (size_t i = begin; i < begin + len; ++i)
305307 result->AppendUnsafe (this ->GetItem (i));
@@ -353,6 +355,19 @@ void ColumnLowCardinality::AppendUnsafe(const ItemView & value) {
353355 }
354356}
355357
358+ void ColumnLowCardinality::AppendNullItemToEmptyColumn ()
359+ {
360+ // INVARIANT: Empty LC column has an (invisible) null-item at pos 0, which MUST be present in
361+ // unique_items_map_ in order to reuse dictionary posistion on subsequent Append()-s.
362+
363+ // Should be only performed on empty LC column.
364+ assert (dictionary_column_->Size () == 0 && unique_items_map_.empty ());
365+
366+ const auto null_item = GetNullItemForDictionary (dictionary_column_);
367+ AppendToDictionary (*dictionary_column_, null_item);
368+ unique_items_map_.emplace (computeHashKey (null_item), dictionary_column_->Size ());
369+ }
370+
356371size_t ColumnLowCardinality::GetDictionarySize () const {
357372 return dictionary_column_->Size ();
358373}
0 commit comments