Skip to content

Commit 5656182

Browse files
committed
Improve GML handling; add skip option
1 parent 6fcf907 commit 5656182

File tree

1 file changed

+57
-31
lines changed

1 file changed

+57
-31
lines changed

src/main.rs

Lines changed: 57 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@ use regex::Regex;
1313
struct Table<'a> {
1414
path: String,
1515
file: RefCell<Box<dyn Write>>,
16+
skip: String,
1617
columns: Vec<Column<'a>>
1718
}
1819
impl<'a> Table<'a> {
19-
fn new(path: &str, file: Option<&str>, filemode: &str) -> Table<'a> {
20+
fn new(path: &str, file: Option<&str>, filemode: &str, skip: Option<&'a str>) -> Table<'a> {
2021
Table {
2122
path: String::from(path),
2223
file: match file {
@@ -29,7 +30,8 @@ impl<'a> Table<'a> {
2930
}
3031
))
3132
},
32-
columns: Vec::new()
33+
columns: Vec::new(),
34+
skip: match skip { Some(s) => format!("{}{}", path, s), None => String::new() }
3335
}
3436
}
3537
fn write(&self, text: &str) {
@@ -79,10 +81,11 @@ impl Geometry {
7981
fn gml_to_ewkb(cell: &RefCell<String>, coll: &Vec<Geometry>) {
8082
let mut ewkb: Vec<u8> = vec![];
8183

82-
// if coll.len() > 1 {
83-
ewkb.extend_from_slice(&[1, 6, 0, 0, 0]);
84+
if coll.len() > 1 {
85+
let multitype = coll.first().unwrap().gtype+3;
86+
ewkb.extend_from_slice(&[1, multitype, 0, 0, 0]);
8487
ewkb.extend_from_slice(&(coll.len() as u32).to_le_bytes());
85-
// }
88+
}
8689

8790
for geom in coll {
8891
let code = match geom.dims {
@@ -95,9 +98,9 @@ fn gml_to_ewkb(cell: &RefCell<String>, coll: &Vec<Geometry>) {
9598
};
9699
ewkb.extend_from_slice(&[1, geom.gtype, 0, 0, code]);
97100
ewkb.extend_from_slice(&geom.srid.to_le_bytes());
98-
ewkb.extend_from_slice(&(geom.rings.len() as u32).to_le_bytes());
101+
if geom.gtype == 3 { ewkb.extend_from_slice(&(geom.rings.len() as u32).to_le_bytes()); } // Only polygons have multiple rings
99102
for ring in geom.rings.iter() {
100-
ewkb.extend_from_slice(&((ring.len() as u32)/geom.dims as u32).to_le_bytes());
103+
if geom.gtype != 1 { ewkb.extend_from_slice(&((ring.len() as u32)/geom.dims as u32).to_le_bytes()); } // Points don't have multiple vertices
101104
for pos in ring.iter() {
102105
ewkb.extend_from_slice(&pos.to_le_bytes());
103106
}
@@ -110,8 +113,8 @@ fn gml_to_ewkb(cell: &RefCell<String>, coll: &Vec<Geometry>) {
110113
}
111114
}
112115

113-
fn add_table<'a>(rowpath: &str, outfile: Option<&str>, filemode: &str, colspec: &'a [Yaml]) -> Table<'a> {
114-
let mut table = Table::new(rowpath, outfile, filemode);
116+
fn add_table<'a>(rowpath: &str, outfile: Option<&str>, filemode: &str, skip: Option<&'a str>, colspec: &'a [Yaml]) -> Table<'a> {
117+
let mut table = Table::new(rowpath, outfile, filemode, skip);
115118
for col in colspec {
116119
let name = col["name"].as_str().expect("Column has no 'name' entry in configuration file");
117120
let colpath = col["path"].as_str().expect("Column has no 'path' entry in configuration file");
@@ -121,7 +124,7 @@ fn add_table<'a>(rowpath: &str, outfile: Option<&str>, filemode: &str, colspec:
121124
true => None,
122125
false => {
123126
let file = col["file"].as_str().expect("Subtable has no 'file' entry");
124-
Some(add_table(&path, Some(&file), filemode, col["cols"].as_vec().expect("Subtable 'cols' entry is not an array")))
127+
Some(add_table(&path, Some(&file), filemode, skip, col["cols"].as_vec().expect("Subtable 'cols' entry is not an array")))
125128
}
126129
};
127130
let filter: Option<Regex> = match col["filt"].as_str() {
@@ -177,6 +180,7 @@ fn main() -> std::io::Result<()> {
177180
let mut buf = Vec::new();
178181
let mut fullcount = 0;
179182
let mut filtercount = 0;
183+
let mut skipcount = 0;
180184

181185
let rowpath = config["path"].as_str().expect("No valid 'path' entry in configuration file");
182186
let colspec = config["cols"].as_vec().expect("No valid 'cols' array in configuration file");
@@ -185,11 +189,13 @@ fn main() -> std::io::Result<()> {
185189
true => "truncate",
186190
false => config["mode"].as_str().expect("Invalid 'mode' entry in configuration file")
187191
};
188-
let maintable = add_table(rowpath, outfile, filemode, colspec);
192+
let skip = config["skip"].as_str();
193+
let maintable = add_table(rowpath, outfile, filemode, skip, colspec);
189194
let mut tables: Vec<&Table> = Vec::new();
190195
let mut table = &maintable;
191196

192197
let mut filtered = false;
198+
let mut skipped = false;
193199
let mut xmltotext = false;
194200
let mut text = String::new();
195201
let mut gmltoewkb = false;
@@ -201,12 +207,35 @@ fn main() -> std::io::Result<()> {
201207
Ok(Event::Start(ref e)) => {
202208
path.push('/');
203209
path.push_str(reader.decode(e.name()).unwrap());
204-
if filtered { continue; }
205-
if xmltotext {
210+
if filtered || skipped { continue; }
211+
if path == table.skip {
212+
skipped = true;
213+
continue;
214+
}
215+
else if xmltotext {
206216
text.push_str(&format!("<{}>", &e.unescape_and_decode(&reader).unwrap()));
207217
continue;
208218
}
209219
else if gmltoewkb {
220+
match reader.decode(e.name()) {
221+
Err(_) => (),
222+
Ok(tag) => match tag {
223+
"gml:Point" => {
224+
gmlcoll.push(Geometry::new(1));
225+
gmlcoll.last_mut().unwrap().rings.push(Vec::new());
226+
},
227+
"gml:LineString" => gmlcoll.push(Geometry::new(2)),
228+
"gml:Polygon" => gmlcoll.push(Geometry::new(3)),
229+
"gml:MultiPolygon" => (),
230+
"gml:polygonMember" => (),
231+
"gml:exterior" => (),
232+
"gml:interior" => (),
233+
"gml:LinearRing" => gmlcoll.last_mut().unwrap().rings.push(Vec::new()),
234+
"gml:posList" => gmlpos = true,
235+
"gml:pos" => gmlpos = true,
236+
_ => eprintln!("GML type {} not supported", tag)
237+
}
238+
}
210239
for res in e.attributes() {
211240
match res {
212241
Err(_) => (),
@@ -239,21 +268,6 @@ fn main() -> std::io::Result<()> {
239268
}
240269
}
241270
}
242-
match reader.decode(e.name()) {
243-
Err(_) => (),
244-
Ok(tag) => match tag {
245-
"gml:Point" => gmlcoll.push(Geometry::new(1)),
246-
"gml:LineString" => gmlcoll.push(Geometry::new(2)),
247-
"gml:Polygon" => gmlcoll.push(Geometry::new(3)),
248-
"gml:MultiPolygon" => (),
249-
"gml:polygonMember" => (),
250-
"gml:exterior" => (),
251-
"gml:interior" => (),
252-
"gml:LinearRing" => gmlcoll.last_mut().unwrap().rings.push(Vec::new()),
253-
"gml:posList" => gmlpos = true,
254-
_ => eprintln!("GML type {} not supported", tag)
255-
}
256-
}
257271
continue;
258272
}
259273
else if path == table.path {
@@ -311,7 +325,7 @@ fn main() -> std::io::Result<()> {
311325
}
312326
},
313327
Ok(Event::Text(ref e)) => {
314-
if filtered { continue; }
328+
if filtered || skipped { continue; }
315329
if xmltotext {
316330
text.push_str(&e.unescape_and_decode(&reader).unwrap());
317331
continue;
@@ -335,6 +349,9 @@ fn main() -> std::io::Result<()> {
335349
break;
336350
}
337351
},
352+
Some("first") => {
353+
break;
354+
},
338355
Some("append") => {
339356
if !table.columns[i].value.borrow().is_empty() { table.columns[i].value.borrow_mut().push(','); }
340357
},
@@ -385,6 +402,10 @@ fn main() -> std::io::Result<()> {
385402
if !tables.is_empty() { table = tables.pop().unwrap(); }
386403
}
387404
}
405+
else if path == table.skip {
406+
skipped = false;
407+
skipcount += 1;
408+
}
388409
let i = path.rfind('/').unwrap();
389410
let tag = path.split_off(i);
390411
if xmltotext {
@@ -402,7 +423,7 @@ fn main() -> std::io::Result<()> {
402423
}
403424
}
404425
else if gmltoewkb {
405-
if gmlpos && (tag == "/gml:posList") { gmlpos = false; }
426+
if gmlpos && ((tag == "/gml:pos") || (tag == "/gml:posList")) { gmlpos = false; }
406427
for i in 0..table.columns.len() {
407428
if path == table.columns[i].path {
408429
gmltoewkb = false;
@@ -419,6 +440,11 @@ fn main() -> std::io::Result<()> {
419440
}
420441
buf.clear();
421442
}
422-
eprintln!("{} rows processed in {} seconds{}", fullcount-filtercount, start.elapsed().as_secs(), match filtercount { 0 => "".to_owned(), n => format!(" ({} filtered)", n) });
443+
eprintln!("{} rows processed in {} seconds{}{}",
444+
fullcount-filtercount-skipcount,
445+
start.elapsed().as_secs(),
446+
match filtercount { 0 => "".to_owned(), n => format!(" ({} filtered)", n) },
447+
match skipcount { 0 => "".to_owned(), n => format!(" ({} skipped)", n) }
448+
);
423449
Ok(())
424450
}

0 commit comments

Comments
 (0)