55#include < Columns/ColumnsNumber.h>
66
77#include < Functions/keyvaluepair/impl/StateHandler.h>
8- #include < Functions/keyvaluepair/impl/KeyValuePairExtractor.h>
8+ #include < Functions/keyvaluepair/impl/StateHandlerImpl.h>
9+ #include < absl/container/flat_hash_map.h>
910
1011namespace DB
1112{
@@ -16,37 +17,36 @@ namespace ErrorCodes
1617 extern const int LIMIT_EXCEEDED;
1718}
1819
20+ namespace extractKV
21+ {
1922/*
2023 * Handle state transitions and a few states like `FLUSH_PAIR` and `END`.
2124 * */
2225template <typename StateHandler>
23- class CHKeyValuePairExtractor : public KeyValuePairExtractor
26+ class KeyValuePairExtractor
2427{
2528 using State = typename DB::extractKV::StateHandler::State;
2629 using NextState = DB::extractKV::StateHandler::NextState;
2730
2831public:
29- explicit CHKeyValuePairExtractor (StateHandler state_handler_, uint64_t max_number_of_pairs_)
30- : state_handler(std::move(state_handler_)), max_number_of_pairs(max_number_of_pairs_)
31- {}
32+ using PairWriter = typename StateHandler::PairWriter;
3233
33- uint64_t extract (const std::string & data, ColumnString::MutablePtr & keys, ColumnString::MutablePtr & values) override
34+ KeyValuePairExtractor (const Configuration & configuration_, uint64_t max_number_of_pairs_)
35+ : state_handler(StateHandler(configuration_))
36+ , max_number_of_pairs(max_number_of_pairs_)
3437 {
35- return extract (std::string_view {data}, keys, values);
3638 }
3739
38- uint64_t extract (std::string_view data, ColumnString::MutablePtr & keys, ColumnString::MutablePtr & values) override
40+ protected:
41+ uint64_t extractImpl (std::string_view data, typename StateHandler::PairWriter & pair_writer)
3942 {
4043 auto state = State::WAITING_KEY;
4144
42- auto key = typename StateHandler::StringWriter (*keys);
43- auto value = typename StateHandler::StringWriter (*values);
44-
4545 uint64_t row_offset = 0 ;
4646
4747 while (state != State::END)
4848 {
49- auto next_state = processState (data, state, key, value , row_offset);
49+ auto next_state = processState (data, state, pair_writer , row_offset);
5050
5151 if (next_state.position_in_string > data.size () && next_state.state != State::END)
5252 {
@@ -61,14 +61,13 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
6161 }
6262
6363 // below reset discards invalid keys and values
64- reset (key, value );
64+ reset (pair_writer );
6565
6666 return row_offset;
6767 }
6868
6969private:
70-
71- NextState processState (std::string_view file, State state, auto & key, auto & value, uint64_t & row_offset)
70+ NextState processState (std::string_view file, State state, auto & pair_writer, uint64_t & row_offset)
7271 {
7372 switch (state)
7473 {
@@ -78,11 +77,11 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
7877 }
7978 case State::READING_KEY:
8079 {
81- return state_handler.readKey (file, key );
80+ return state_handler.readKey (file, pair_writer );
8281 }
8382 case State::READING_QUOTED_KEY:
8483 {
85- return state_handler.readQuotedKey (file, key );
84+ return state_handler.readQuotedKey (file, pair_writer );
8685 }
8786 case State::READING_KV_DELIMITER:
8887 {
@@ -94,15 +93,15 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
9493 }
9594 case State::READING_VALUE:
9695 {
97- return state_handler.readValue (file, value );
96+ return state_handler.readValue (file, pair_writer );
9897 }
9998 case State::READING_QUOTED_VALUE:
10099 {
101- return state_handler.readQuotedValue (file, value );
100+ return state_handler.readQuotedValue (file, pair_writer );
102101 }
103102 case State::FLUSH_PAIR:
104103 {
105- return flushPair (file, key, value , row_offset);
104+ return flushPair (file, pair_writer , row_offset);
106105 }
107106 case State::END:
108107 {
@@ -111,8 +110,7 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
111110 }
112111 }
113112
114- NextState flushPair (const std::string_view & file, auto & key,
115- auto & value, uint64_t & row_offset)
113+ NextState flushPair (const std::string_view & file, auto & pair_writer, uint64_t & row_offset)
116114 {
117115 row_offset++;
118116
@@ -121,20 +119,61 @@ class CHKeyValuePairExtractor : public KeyValuePairExtractor
121119 throw Exception (ErrorCodes::LIMIT_EXCEEDED, " Number of pairs produced exceeded the limit of {}" , max_number_of_pairs);
122120 }
123121
124- key. commit ();
125- value. commit ();
122+ pair_writer. commitKey ();
123+ pair_writer. commitValue ();
126124
127125 return {0 , file.empty () ? State::END : State::WAITING_KEY};
128126 }
129127
130- void reset (auto & key, auto & value )
128+ void reset (auto & pair_writer )
131129 {
132- key. reset ();
133- value. reset ();
130+ pair_writer. resetKey ();
131+ pair_writer. resetValue ();
134132 }
135133
136134 StateHandler state_handler;
137135 uint64_t max_number_of_pairs;
138136};
139137
140138}
139+
140+ struct KeyValuePairExtractorNoEscaping : extractKV::KeyValuePairExtractor<extractKV::NoEscapingStateHandler>
141+ {
142+ using StateHandler = extractKV::NoEscapingStateHandler;
143+ explicit KeyValuePairExtractorNoEscaping (const extractKV::Configuration & configuration_, std::size_t max_number_of_pairs_)
144+ : KeyValuePairExtractor(configuration_, max_number_of_pairs_) {}
145+
146+ uint64_t extract (std::string_view data, ColumnString::MutablePtr & keys, ColumnString::MutablePtr & values)
147+ {
148+ auto pair_writer = typename StateHandler::PairWriter (*keys, *values);
149+ return extractImpl (data, pair_writer);
150+ }
151+ };
152+
153+ struct KeyValuePairExtractorInlineEscaping : extractKV::KeyValuePairExtractor<extractKV::InlineEscapingStateHandler>
154+ {
155+ using StateHandler = extractKV::InlineEscapingStateHandler;
156+ explicit KeyValuePairExtractorInlineEscaping (const extractKV::Configuration & configuration_, std::size_t max_number_of_pairs_)
157+ : KeyValuePairExtractor(configuration_, max_number_of_pairs_) {}
158+
159+ uint64_t extract (std::string_view data, ColumnString::MutablePtr & keys, ColumnString::MutablePtr & values)
160+ {
161+ auto pair_writer = typename StateHandler::PairWriter (*keys, *values);
162+ return extractImpl (data, pair_writer);
163+ }
164+ };
165+
166+ struct KeyValuePairExtractorReferenceMap : extractKV::KeyValuePairExtractor<extractKV::ReferencesMapStateHandler>
167+ {
168+ using StateHandler = extractKV::ReferencesMapStateHandler;
169+ explicit KeyValuePairExtractorReferenceMap (const extractKV::Configuration & configuration_, std::size_t max_number_of_pairs_)
170+ : KeyValuePairExtractor(configuration_, max_number_of_pairs_) {}
171+
172+ uint64_t extract (std::string_view data, absl::flat_hash_map<std::string_view, std::string_view> & map)
173+ {
174+ auto pair_writer = typename StateHandler::PairWriter (map);
175+ return extractImpl (data, pair_writer);
176+ }
177+ };
178+
179+ }
0 commit comments