1
- extern crate quick_xml;
2
- extern crate yaml_rust;
3
-
4
1
use std:: io:: { Read , Write , stdout} ;
5
2
use std:: fs:: { File , OpenOptions } ;
6
3
use std:: path:: Path ;
@@ -11,6 +8,7 @@ use quick_xml::Reader;
11
8
use quick_xml:: events:: Event ;
12
9
use yaml_rust:: YamlLoader ;
13
10
use yaml_rust:: yaml:: Yaml ;
11
+ use regex:: Regex ;
14
12
15
13
struct Table < ' a > {
16
14
path : String ,
@@ -37,13 +35,19 @@ impl<'a> Table<'a> {
37
35
fn write ( & self , text : & str ) {
38
36
self . file . borrow_mut ( ) . write_all ( & text. as_bytes ( ) ) . expect ( "Write error encountered; exiting..." ) ;
39
37
}
38
+ fn clear_columns ( & self ) {
39
+ for col in & self . columns {
40
+ col. value . borrow_mut ( ) . clear ( ) ;
41
+ }
42
+ }
40
43
}
41
44
42
45
struct Column < ' a > {
43
46
name : String ,
44
47
path : String ,
45
48
value : RefCell < String > ,
46
49
attr : Option < & ' a str > ,
50
+ filter : Option < Regex > ,
47
51
convert : Option < & ' a str > ,
48
52
find : Option < & ' a str > ,
49
53
replace : Option < & ' a str > ,
@@ -117,12 +121,32 @@ fn add_table<'a>(rowpath: &str, outfile: Option<&str>, filemode: &str, colspec:
117
121
Some ( add_table ( & path, Some ( & file) , filemode, col[ "cols" ] . as_vec ( ) . expect ( "Subtable 'cols' entry is not an array" ) ) )
118
122
}
119
123
} ;
124
+ let filter: Option < Regex > = match col[ "filt" ] . as_str ( ) {
125
+ Some ( str) => Some ( Regex :: new ( & str) . expect ( "Invalid regex in 'filt' entry in configuration file" ) ) ,
126
+ None => None
127
+ } ;
120
128
let attr = col[ "attr" ] . as_str ( ) ;
121
129
let convert = col[ "conv" ] . as_str ( ) ;
122
130
let find = col[ "find" ] . as_str ( ) ;
123
131
let replace = col[ "repl" ] . as_str ( ) ;
124
132
let consol = col[ "cons" ] . as_str ( ) ;
125
- let column = Column { name : name. to_string ( ) , path, value : RefCell :: new ( String :: new ( ) ) , attr, convert, find, replace, consol, subtable } ;
133
+
134
+ if convert. is_some ( ) && !vec ! ( "xml-to-text" , "gml-to-ewkb" ) . contains ( & convert. unwrap ( ) ) {
135
+ panic ! ( "Option 'convert' contains invalid value {}" , convert. unwrap( ) ) ;
136
+ }
137
+ if filter. is_some ( ) {
138
+ if convert. is_some ( ) {
139
+ panic ! ( "Option 'filt' and 'conv' cannot be used together on a single column" ) ;
140
+ }
141
+ if find. is_some ( ) {
142
+ eprintln ! ( "Notice: when using a filter and find/replace on a single column, the filter is applied before replacements" ) ;
143
+ }
144
+ if consol. is_some ( ) {
145
+ eprintln ! ( "Notice: when using a filter and consolidation on a single column, the filter is applied to each phase of consolidation separately" ) ;
146
+ }
147
+ }
148
+
149
+ let column = Column { name : name. to_string ( ) , path, value : RefCell :: new ( String :: new ( ) ) , attr, filter, convert, find, replace, consol, subtable } ;
126
150
table. columns . push ( column) ;
127
151
}
128
152
table
@@ -148,7 +172,8 @@ fn main() -> std::io::Result<()> {
148
172
149
173
let mut path = String :: new ( ) ;
150
174
let mut buf = Vec :: new ( ) ;
151
- let mut count = 0 ;
175
+ let mut fullcount = 0 ;
176
+ let mut filtercount = 0 ;
152
177
153
178
let rowpath = config[ "path" ] . as_str ( ) . expect ( "No valid 'path' entry in configuration file" ) ;
154
179
let colspec = config[ "cols" ] . as_vec ( ) . expect ( "No valid 'cols' array in configuration file" ) ;
@@ -161,17 +186,20 @@ fn main() -> std::io::Result<()> {
161
186
let mut tables: Vec < & Table > = Vec :: new ( ) ;
162
187
let mut table = & maintable;
163
188
189
+ let mut filtered = false ;
164
190
let mut xmltotext = false ;
165
191
let mut text = String :: new ( ) ;
166
192
let mut gmltoewkb = false ;
167
193
let mut gmlpos = false ;
194
+ let mut gmlint = false ;
168
195
let mut gmlgeom = Geometry :: new ( ) ;
169
196
let start = Instant :: now ( ) ;
170
197
loop {
171
198
match reader. read_event ( & mut buf) {
172
199
Ok ( Event :: Start ( ref e) ) => {
173
200
path. push ( '/' ) ;
174
201
path. push_str ( reader. decode ( e. name ( ) ) . unwrap ( ) ) ;
202
+ if filtered { continue ; }
175
203
if xmltotext {
176
204
text. push_str ( & format ! ( "<{}>" , & e. unescape_and_decode( & reader) . unwrap( ) ) ) ;
177
205
continue ;
@@ -211,8 +239,13 @@ fn main() -> std::io::Result<()> {
211
239
"gml:Point" => gmlgeom. gtype = 1 ,
212
240
"gml:LineString" => gmlgeom. gtype = 2 ,
213
241
"gml:Polygon" => gmlgeom. gtype = 3 ,
242
+ "gml:MultiPolygon" => ( ) ,
243
+ "gml:polygonMember" => ( ) ,
214
244
"gml:exterior" => ( ) ,
215
- "gml:interior" => ( ) ,
245
+ "gml:interior" => {
246
+ eprintln ! ( "GML polygon interior ring not yet supported; ignored" ) ;
247
+ gmlint = true ;
248
+ } ,
216
249
"gml:LinearRing" => gmlgeom. rings . push ( Vec :: new ( ) ) ,
217
250
"gml:posList" => gmlpos = true ,
218
251
_ => eprintln ! ( "GML type {} not supported" , tag)
@@ -221,7 +254,7 @@ fn main() -> std::io::Result<()> {
221
254
continue ;
222
255
}
223
256
else if path == table. path {
224
- count += 1 ;
257
+ fullcount += 1 ;
225
258
}
226
259
else if path. len ( ) > table. path . len ( ) {
227
260
for i in 0 ..table. columns . len ( ) {
@@ -254,6 +287,12 @@ fn main() -> std::io::Result<()> {
254
287
if table. columns [ i] . value . borrow ( ) . is_empty ( ) {
255
288
eprintln ! ( "Column {} requested attribute {} not found" , table. columns[ i] . name, request) ;
256
289
}
290
+ if let Some ( re) = & table. columns [ i] . filter {
291
+ if !re. is_match ( & table. columns [ i] . value . borrow ( ) ) {
292
+ filtered = true ;
293
+ table. clear_columns ( ) ;
294
+ }
295
+ }
257
296
}
258
297
259
298
// Set the appropriate convert flag for the following data in case the 'conv' option is present
@@ -269,12 +308,13 @@ fn main() -> std::io::Result<()> {
269
308
}
270
309
} ,
271
310
Ok ( Event :: Text ( ref e) ) => {
311
+ if filtered { continue ; }
272
312
if xmltotext {
273
313
text. push_str ( & e. unescape_and_decode ( & reader) . unwrap ( ) ) ;
274
314
continue ;
275
315
}
276
316
else if gmltoewkb {
277
- if gmlpos {
317
+ if gmlpos && !gmlint {
278
318
let value = String :: from ( & e. unescape_and_decode ( & reader) . unwrap ( ) ) ;
279
319
for pos in value. split ( ' ' ) {
280
320
gmlgeom. rings . last_mut ( ) . unwrap ( ) . push ( pos. parse ( ) . unwrap ( ) ) ;
@@ -301,35 +341,47 @@ fn main() -> std::io::Result<()> {
301
341
}
302
342
}
303
343
table. columns [ i] . value . borrow_mut ( ) . push_str ( & e. unescape_and_decode ( & reader) . unwrap ( ) . replace ( "\\ " , "\\ \\ " ) ) ;
344
+ if let Some ( re) = & table. columns [ i] . filter {
345
+ if !re. is_match ( & table. columns [ i] . value . borrow ( ) ) {
346
+ filtered = true ;
347
+ table. clear_columns ( ) ;
348
+ }
349
+ }
304
350
break ;
305
351
}
306
352
}
307
353
} ,
308
354
Ok ( Event :: End ( _) ) => {
309
355
if path == table. path {
310
-
311
- // End tag of a subtable; write the first column value of the parent table as the first column of the subtable
312
- if !tables. is_empty ( ) {
313
- table. write ( & tables. last ( ) . unwrap ( ) . columns [ 0 ] . value . borrow ( ) ) ;
314
- table. write ( "\t " ) ;
356
+ if filtered {
357
+ filtered = false ;
358
+ filtercount += 1 ;
315
359
}
360
+ else {
316
361
317
- // Now write out the other column values
318
- for i in 0 ..table. columns . len ( ) {
319
- if table. columns [ i] . subtable . is_some ( ) { continue ; }
320
- if i > 0 { table. write ( "\t " ) ; }
321
- if table. columns [ i] . value . borrow ( ) . is_empty ( ) { table. write ( "\\ N" ) ; }
322
- else {
323
- if let ( Some ( s) , Some ( r) ) = ( table. columns [ i] . find , table. columns [ i] . replace ) {
324
- let mut value = table. columns [ i] . value . borrow_mut ( ) ;
325
- * value = value. replace ( s, r) ;
362
+ // End tag of a subtable; write the first column value of the parent table as the first column of the subtable
363
+ if !tables. is_empty ( ) {
364
+ table. write ( & tables. last ( ) . unwrap ( ) . columns [ 0 ] . value . borrow ( ) ) ;
365
+ table. write ( "\t " ) ;
366
+ }
367
+
368
+ // Now write out the other column values
369
+ for i in 0 ..table. columns . len ( ) {
370
+ if table. columns [ i] . subtable . is_some ( ) { continue ; }
371
+ if i > 0 { table. write ( "\t " ) ; }
372
+ if table. columns [ i] . value . borrow ( ) . is_empty ( ) { table. write ( "\\ N" ) ; }
373
+ else {
374
+ if let ( Some ( s) , Some ( r) ) = ( table. columns [ i] . find , table. columns [ i] . replace ) {
375
+ let mut value = table. columns [ i] . value . borrow_mut ( ) ;
376
+ * value = value. replace ( s, r) ;
377
+ }
378
+ table. write ( & table. columns [ i] . value . borrow ( ) ) ;
379
+ table. columns [ i] . value . borrow_mut ( ) . clear ( ) ;
326
380
}
327
- table. write ( & table. columns [ i] . value . borrow ( ) ) ;
328
- table. columns [ i] . value . borrow_mut ( ) . clear ( ) ;
329
381
}
382
+ table. write ( "\n " ) ;
383
+ if !tables. is_empty ( ) { table = tables. pop ( ) . unwrap ( ) ; }
330
384
}
331
- table. write ( "\n " ) ;
332
- if !tables. is_empty ( ) { table = tables. pop ( ) . unwrap ( ) ; }
333
385
}
334
386
let i = path. rfind ( '/' ) . unwrap ( ) ;
335
387
let tag = path. split_off ( i) ;
@@ -348,6 +400,8 @@ fn main() -> std::io::Result<()> {
348
400
}
349
401
}
350
402
else if gmltoewkb {
403
+ if gmlpos && ( tag == "gml:posList" ) { gmlpos = false ; }
404
+ if gmlint && ( tag == "gml:interior" ) { gmlint = false ; }
351
405
for i in 0 ..table. columns . len ( ) {
352
406
if path == table. columns [ i] . path {
353
407
gmltoewkb = false ;
@@ -364,6 +418,6 @@ fn main() -> std::io::Result<()> {
364
418
}
365
419
buf. clear ( ) ;
366
420
}
367
- eprintln ! ( "{} rows processed in {} seconds" , count , start. elapsed( ) . as_secs( ) ) ;
421
+ eprintln ! ( "{} rows processed in {} seconds{} " , fullcount-filtercount , start. elapsed( ) . as_secs( ) , match filtercount { 0 => "" . to_owned ( ) , n => format! ( " ({} filtered)" , n ) } ) ;
368
422
Ok ( ( ) )
369
423
}
0 commit comments