66#include < IO/ReadHelpers.h>
77#include < base/types.h>
88#include < Common/Exception.h>
9- #include " Functions/geometryConverters.h"
10- #include " IO/ReadBufferFromString.h"
119
1210#include < DataTypes/DataTypeArray.h>
1311#include < DataTypes/DataTypeTuple.h>
@@ -95,79 +93,71 @@ std::unordered_map<String, GeoColumnMetadata> parseGeoMetadataEncoding(std::opti
9593 return geo_columns;
9694}
9795
98- CartesianPoint readPointWKB (ReadBuffer & in_buffer, std::endian endian_to_read)
96+ inline ArrowPoint readPointWKB (ReadBuffer & in_buffer, std::endian endian_to_read)
9997{
10098 double x;
10199 double y;
102100 readBinaryEndian (x, in_buffer, endian_to_read);
103101 readBinaryEndian (y, in_buffer, endian_to_read);
104- return CartesianPoint (x, y) ;
102+ return ArrowPoint{. x = x, . y = y} ;
105103}
106104
107- CartesianLineString readLineWKB (ReadBuffer & in_buffer, std::endian endian_to_read)
105+ inline ArrowLineString readLineWKB (ReadBuffer & in_buffer, std::endian endian_to_read)
108106{
109107 int num_points;
110108 readBinaryEndian (num_points, in_buffer, endian_to_read);
111109
112- CartesianLineString line;
110+ ArrowLineString line;
113111 for (int i = 0 ; i < num_points; ++i)
114112 {
115113 line.push_back (readPointWKB (in_buffer, endian_to_read));
116114 }
117115 return line;
118116}
119117
120- CartesianPolygon readPolygonWKB (ReadBuffer & in_buffer, std::endian endian_to_read)
118+ inline ArrowPolygon readPolygonWKB (ReadBuffer & in_buffer, std::endian endian_to_read)
121119{
122120 int num_lines;
123121 readBinaryEndian (num_lines, in_buffer, endian_to_read);
124122
125- CartesianPolygon polygon;
126- {
127- auto parsed_points = readLineWKB (in_buffer, endian_to_read);
128- for (const auto & point : parsed_points)
129- polygon.outer ().push_back (point);
130- }
131-
132- for (int i = 1 ; i < num_lines; ++i)
123+ ArrowPolygon polygon;
124+ for (int i = 0 ; i < num_lines; ++i)
133125 {
134126 auto parsed_points = readLineWKB (in_buffer, endian_to_read);
135- polygon.inners ().push_back ({});
136- for (const auto & point : parsed_points)
137- polygon.inners ().back ().push_back (point);
127+ polygon.push_back (std::move (parsed_points));
138128 }
139129 return polygon;
140130}
141131
142- GeometricObject parseWKBFormat (ReadBuffer & in_buffer);
132+ ArrowGeometricObject parseWKBFormat (ReadBuffer & in_buffer);
143133
144- CartesianMultiLineString readMultiLineStringWKB (ReadBuffer & in_buffer, std::endian endian_to_read)
134+ ArrowMultiLineString readMultiLineStringWKB (ReadBuffer & in_buffer, std::endian endian_to_read)
145135{
146- CartesianMultiLineString multiline;
136+ ArrowMultiLineString multiline;
147137
148138 int num_lines;
149139 readBinaryEndian (num_lines, in_buffer, endian_to_read);
150140
151141 for (int i = 0 ; i < num_lines; ++i)
152- multiline.push_back (std::get<CartesianLineString >(parseWKBFormat (in_buffer)));
142+ multiline.push_back (std::get<ArrowLineString >(parseWKBFormat (in_buffer)));
153143
154144 return multiline;
155145}
156146
157- CartesianMultiPolygon readMultiPolygonWKB (ReadBuffer & in_buffer, std::endian endian_to_read)
147+ ArrowMultiPolygon readMultiPolygonWKB (ReadBuffer & in_buffer, std::endian endian_to_read)
158148{
159- CartesianMultiPolygon multipolygon;
149+ ArrowMultiPolygon multipolygon;
160150
161151 int num_polygons;
162152 readBinaryEndian (num_polygons, in_buffer, endian_to_read);
163153
164154 for (int i = 0 ; i < num_polygons; ++i)
165- multipolygon.push_back (std::get<CartesianPolygon >(parseWKBFormat (in_buffer)));
155+ multipolygon.push_back (std::get<ArrowPolygon >(parseWKBFormat (in_buffer)));
166156
167157 return multipolygon;
168158}
169159
170- GeometricObject parseWKBFormat (ReadBuffer & in_buffer)
160+ ArrowGeometricObject parseWKBFormat (ReadBuffer & in_buffer)
171161{
172162 char little_endian;
173163 if (!in_buffer.read (little_endian))
@@ -195,34 +185,119 @@ GeometricObject parseWKBFormat(ReadBuffer & in_buffer)
195185 }
196186}
197187
198- GeometricObject parseWKBFormat (const String & input)
188+ inline ArrowPoint parseWKTPoint (ReadBuffer & in_buffer)
189+ {
190+ double x;
191+ double y;
192+ char ch;
193+ while (true )
194+ {
195+ in_buffer.peek (ch);
196+ if (ch != ' ' )
197+ break ;
198+ in_buffer.ignore ();
199+ }
200+ tryReadFloatText (x, in_buffer);
201+ in_buffer.ignore ();
202+ readFloatText (y, in_buffer);
203+ return {x, y};
204+ }
205+
206+ inline void readOpenBracket (ReadBuffer & in_buffer)
207+ {
208+ while (true )
209+ {
210+ char ch;
211+ readBinary (ch, in_buffer);
212+ if (ch == ' (' )
213+ break ;
214+ }
215+ }
216+
217+ inline bool readNextItem (ReadBuffer & in_buffer)
218+ {
219+ char ch;
220+ while (true )
221+ {
222+ readBinary (ch, in_buffer);
223+ if (ch == ' )' )
224+ return true ;
225+
226+ if (ch == ' ,' )
227+ return false ;
228+ }
229+ }
230+
231+ inline ArrowLineString parseWKTLine (ReadBuffer & in_buffer)
199232{
200- auto in_buffer = ReadBufferFromString (input);
233+ ArrowLineString ls;
234+ readOpenBracket (in_buffer);
235+ while (true )
236+ {
237+ ls.push_back (parseWKTPoint (in_buffer));
238+ if (readNextItem (in_buffer))
239+ break ;
240+ }
241+ return ls;
242+ }
201243
202- return parseWKBFormat (in_buffer);
244+ inline ArrowPolygon parseWKTPolygon (ReadBuffer & in_buffer)
245+ {
246+ ArrowPolygon poly;
247+ readOpenBracket (in_buffer);
248+ while (true )
249+ {
250+ poly.push_back (parseWKTLine (in_buffer));
251+ if (readNextItem (in_buffer))
252+ break ;
253+ }
254+ return poly;
203255}
204256
205- GeometricObject parseWKTFormat ( const String & input )
257+ inline ArrowMultiPolygon parseWKTMultiPolygon (ReadBuffer & in_buffer )
206258{
207- if (input.starts_with (" POINT" ))
259+ ArrowMultiPolygon poly;
260+ readOpenBracket (in_buffer);
261+ while (true )
208262 {
209- CartesianPoint point ;
210- boost::geometry::read_wkt (input, point);
211- return point ;
263+ poly. push_back ( parseWKTPolygon (in_buffer)) ;
264+ if ( readNextItem (in_buffer))
265+ break ;
212266 }
213- if (input.starts_with (" POLYGON" ))
267+ return poly;
268+ }
269+
270+ ArrowGeometricObject parseWKTFormat (ReadBuffer & in_buffer)
271+ {
272+ std::string type;
273+ while (true )
214274 {
215- CartesianPolygon polygon;
216- boost::geometry::read_wkt (input, polygon);
217- return polygon;
275+ char current_symbol;
276+ in_buffer.peek (current_symbol);
277+ if (current_symbol == ' (' )
278+ break ;
279+ type.push_back (current_symbol);
280+ in_buffer.ignore ();
218281 }
219- if (input.starts_with (" LINESTRING" ))
282+
283+ while (type.back () == ' ' )
284+ type.pop_back ();
285+
286+ if (type == " POINT" )
220287 {
221- CartesianLineString linestring;
222- boost::geometry::read_wkt (input, linestring);
223- return linestring;
288+ readOpenBracket (in_buffer);
289+ return parseWKTPoint (in_buffer);
224290 }
225- throw Exception (ErrorCodes::BAD_ARGUMENTS, " Unknown geometry object in WKT {}" , input);
291+ if (type == " LINESTRING" )
292+ return parseWKTLine (in_buffer);
293+ if (type == " POLYGON" )
294+ return parseWKTPolygon (in_buffer);
295+ if (type == " MULTILINESTRING" )
296+ return parseWKTPolygon (in_buffer);
297+ if (type == " MULTIPOLYGON" )
298+ return parseWKTMultiPolygon (in_buffer);
299+
300+ throw Exception (ErrorCodes::BAD_ARGUMENTS, " Error while reading WKT format: type {}" , type);
226301}
227302
228303PointColumnBuilder::PointColumnBuilder (const String & name_)
@@ -234,14 +309,14 @@ PointColumnBuilder::PointColumnBuilder(const String & name_)
234309{
235310}
236311
237- void PointColumnBuilder::appendObject (const GeometricObject & object)
312+ void PointColumnBuilder::appendObject (const ArrowGeometricObject & object)
238313{
239- if (!std::holds_alternative<CartesianPoint >(object))
314+ if (!std::holds_alternative<ArrowPoint >(object))
240315 throw Exception (ErrorCodes::BAD_ARGUMENTS, " Types in parquet mismatched - expected point" );
241316
242- const auto & point = std::get<CartesianPoint >(object);
243- point_column_data_x.push_back (point.x () );
244- point_column_data_y.push_back (point.y () );
317+ const auto & point = std::get<ArrowPoint >(object);
318+ point_column_data_x.push_back (point.x );
319+ point_column_data_y.push_back (point.y );
245320}
246321
247322ColumnWithTypeAndName PointColumnBuilder::getResultColumn ()
@@ -265,12 +340,12 @@ LineColumnBuilder::LineColumnBuilder(const String & name_)
265340{
266341}
267342
268- void LineColumnBuilder::appendObject (const GeometricObject & object)
343+ void LineColumnBuilder::appendObject (const ArrowGeometricObject & object)
269344{
270- if (!std::holds_alternative<CartesianLineString >(object))
345+ if (!std::holds_alternative<ArrowLineString >(object))
271346 throw Exception (ErrorCodes::BAD_ARGUMENTS, " Types in parquet mismatched - expected line string" );
272347
273- const auto & line = std::get<CartesianLineString >(object);
348+ const auto & line = std::get<ArrowLineString >(object);
274349 for (const auto & point : line)
275350 {
276351 point_column_builder.appendObject (point);
@@ -296,16 +371,15 @@ PolygonColumnBuilder::PolygonColumnBuilder(const String & name_)
296371{
297372}
298373
299- void PolygonColumnBuilder::appendObject (const GeometricObject & object)
374+ void PolygonColumnBuilder::appendObject (const ArrowGeometricObject & object)
300375{
301- if (!std::holds_alternative<CartesianPolygon >(object))
376+ if (!std::holds_alternative<ArrowPolygon >(object))
302377 throw Exception (ErrorCodes::BAD_ARGUMENTS, " Types in parquet mismatched - expected polygon" );
303378
304- const auto & polygon = std::get<CartesianPolygon>(object);
305- line_column_builder.appendObject (CartesianLineString (polygon.outer ().begin (), polygon.outer ().end ()));
306- for (const auto & inner_circle : polygon.inners ())
307- line_column_builder.appendObject (CartesianLineString (inner_circle.begin (), inner_circle.end ()));
308- offset += 1 + polygon.inners ().size ();
379+ const auto & polygon = std::get<ArrowPolygon>(object);
380+ for (const auto & inner_circle : polygon)
381+ line_column_builder.appendObject (inner_circle);
382+ offset += polygon.size ();
309383 offsets.push_back (offset);
310384}
311385
@@ -326,12 +400,12 @@ MultiLineStringColumnBuilder::MultiLineStringColumnBuilder(const String & name_)
326400{
327401}
328402
329- void MultiLineStringColumnBuilder::appendObject (const GeometricObject & object)
403+ void MultiLineStringColumnBuilder::appendObject (const ArrowGeometricObject & object)
330404{
331- if (!std::holds_alternative<CartesianMultiLineString >(object))
405+ if (!std::holds_alternative<ArrowMultiLineString >(object))
332406 throw Exception (ErrorCodes::BAD_ARGUMENTS, " Types in parquet mismatched - expected multiline" );
333407
334- const auto & multilinestring = std::get<CartesianMultiLineString >(object);
408+ const auto & multilinestring = std::get<ArrowMultiLineString >(object);
335409 for (const auto & line : multilinestring)
336410 line_column_builder.appendObject (line);
337411
@@ -356,12 +430,12 @@ MultiPolygonColumnBuilder::MultiPolygonColumnBuilder(const String & name_)
356430{
357431}
358432
359- void MultiPolygonColumnBuilder::appendObject (const GeometricObject & object)
433+ void MultiPolygonColumnBuilder::appendObject (const ArrowGeometricObject & object)
360434{
361- if (!std::holds_alternative<CartesianMultiPolygon >(object))
435+ if (!std::holds_alternative<ArrowMultiPolygon >(object))
362436 throw Exception (ErrorCodes::BAD_ARGUMENTS, " Types in parquet mismatched - expected multi polygon" );
363437
364- const auto & multipolygon = std::get<CartesianMultiPolygon >(object);
438+ const auto & multipolygon = std::get<ArrowMultiPolygon >(object);
365439 for (const auto & polygon : multipolygon)
366440 polygon_column_builder.appendObject (polygon);
367441
@@ -401,7 +475,7 @@ GeoColumnBuilder::GeoColumnBuilder(const String & name_, GeoType type_)
401475 }
402476}
403477
404- void GeoColumnBuilder::appendObject (const GeometricObject & object)
478+ void GeoColumnBuilder::appendObject (const ArrowGeometricObject & object)
405479{
406480 geomery_column_builder->appendObject (object);
407481}
0 commit comments