@@ -165,35 +165,56 @@ void parse_json_to_variant(IColumn& column, const char* src, size_t length,
165165 check_paths.insert (check_paths.end (), paths.begin (), paths.end ());
166166 THROW_IF_ERROR (vectorized::schema_util::check_variant_has_no_ambiguous_paths (check_paths));
167167 }
168- for (size_t i = 0 ; i < paths.size (); ++i) {
169- FieldInfo field_info;
170- schema_util::get_field_info (values[i], &field_info);
171- if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE) {
172- continue ;
173- }
174- if (column_variant.get_subcolumn (paths[i], i) == nullptr ) {
175- if (paths[i].has_nested_part ()) {
176- column_variant.add_nested_subcolumn (paths[i], field_info, old_num_rows);
177- } else {
178- column_variant.add_sub_column (paths[i], old_num_rows);
168+ auto [doc_snapshot_data_paths, doc_snapshot_data_values] = column_variant.get_doc_snapshot_data_paths_and_values ();
169+ auto & doc_snapshot_data_offsets = column_variant.serialized_doc_snapshot_column_offsets ();
170+ if (config.parse_to_subcolumns ) {
171+ for (size_t i = 0 ; i < paths.size (); ++i) {
172+ FieldInfo field_info;
173+ schema_util::get_field_info (values[i], &field_info);
174+ if (field_info.scalar_type_id == PrimitiveType::INVALID_TYPE) {
175+ continue ;
176+ }
177+ if (column_variant.get_subcolumn (paths[i], i) == nullptr ) {
178+ if (paths[i].has_nested_part ()) {
179+ column_variant.add_nested_subcolumn (paths[i], field_info, old_num_rows);
180+ } else {
181+ column_variant.add_sub_column (paths[i], old_num_rows);
182+ }
183+ }
184+ auto * subcolumn = column_variant.get_subcolumn (paths[i], i);
185+ if (!subcolumn) {
186+ throw doris::Exception (ErrorCode::INVALID_ARGUMENT, " Failed to find sub column {}" ,
187+ paths[i].get_path ());
188+ }
189+ if (subcolumn->cur_num_of_defaults () > 0 ) {
190+ subcolumn->insert_many_defaults (subcolumn->cur_num_of_defaults ());
191+ subcolumn->reset_current_num_of_defaults ();
192+ }
193+ if (subcolumn->size () != old_num_rows) {
194+ throw doris::Exception (ErrorCode::INVALID_ARGUMENT,
195+ " subcolumn {} size missmatched, may contains duplicated entry" ,
196+ paths[i].get_path ());
197+ }
198+ subcolumn->insert (std::move (values[i]), std::move (field_info));
199+ if (!paths[i].empty () && config.parse_to_doc_snapshot ) {
200+ subcolumn->serialize_to_sparse_column (doc_snapshot_data_paths, paths[i].get_path (), doc_snapshot_data_values, old_num_rows);
179201 }
180202 }
181- auto * subcolumn = column_variant.get_subcolumn (paths[i], i);
182- if (!subcolumn) {
183- throw doris::Exception (ErrorCode::INVALID_ARGUMENT, " Failed to find sub column {}" ,
184- paths[i].get_path ());
185- }
186- if (subcolumn->cur_num_of_defaults () > 0 ) {
187- subcolumn->insert_many_defaults (subcolumn->cur_num_of_defaults ());
188- subcolumn->reset_current_num_of_defaults ();
189- }
190- if (subcolumn->size () != old_num_rows) {
191- throw doris::Exception (ErrorCode::INVALID_ARGUMENT,
192- " subcolumn {} size missmatched, may contains duplicated entry" ,
193- paths[i].get_path ());
203+ } else {
204+ CHECK (config.parse_to_doc_snapshot );
205+ for (size_t i = 0 ; i < paths.size (); ++i) {
206+ FieldInfo field_info;
207+ schema_util::get_field_info (values[i], &field_info);
208+ if (paths[i].empty ()) {
209+ column_variant.get_subcolumn (paths[i])->insert (std::move (values[i]), std::move (field_info));
210+ continue ;
211+ }
212+ ColumnVariant::Subcolumn tmp_subcolumn (0 , true );
213+ tmp_subcolumn.insert (std::move (values[i]), std::move (field_info));
214+ tmp_subcolumn.serialize_to_sparse_column (doc_snapshot_data_paths, paths[i].get_path (), doc_snapshot_data_values, 0 );
194215 }
195- subcolumn->insert (std::move (values[i]), std::move (field_info));
196216 }
217+ doc_snapshot_data_offsets.push_back (doc_snapshot_data_paths->size ());
197218 // /// Insert default values to missed subcolumns.
198219 const auto & subcolumns = column_variant.get_subcolumns ();
199220 for (const auto & entry : subcolumns) {
@@ -215,7 +236,6 @@ void parse_json_to_variant(IColumn& column, const char* src, size_t length,
215236 auto sparse_column = column_variant.get_sparse_column ();
216237 if (sparse_column->size () == old_num_rows) {
217238 sparse_column->assume_mutable ()->insert_default ();
218- column_variant.get_doc_snapshot_column ()->assume_mutable ()->insert_default ();
219239 }
220240#ifndef NDEBUG
221241 column_variant.check_consistency ();
@@ -240,7 +260,7 @@ void parse_json_to_variant(IColumn& column, const ColumnString& raw_json_column,
240260
241261
242262// pasre the doc snapshot column to subcolumns
243- void parse_binary_to_variant (ColumnVariant& column_variant, const ParseConfig& config ) {
263+ void parse_binary_to_variant (ColumnVariant& column_variant) {
244264 std::unordered_map<std::string_view, vectorized::ColumnVariant::Subcolumn> subcolumns;
245265
246266 auto [column_key, column_value] = column_variant.get_doc_snapshot_data_paths_and_values ();
0 commit comments