Skip to content

Commit e3291c1

Browse files
committed
filter all month trigraphs in MGRS
filter all DD/DMS coordinates by resolution -- sub-degree by default will pass
1 parent 05b7687 commit e3291c1

File tree

4 files changed

+81
-45
lines changed

4 files changed

+81
-45
lines changed

src/main/python/opensextant/extractors/xcoord.py

Lines changed: 58 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,6 @@
88
from opensextant.FlexPat import PatternExtractor, RegexPatternManager, PatternMatch
99

1010

11-
# History - 2024 may - MCU ported from XCoord Java
12-
#
13-
#
14-
# TODO: error "precision" scales
15-
# text formatting of normalized coordinate
16-
# complete testing
17-
18-
class XCoord(PatternExtractor):
19-
"""
20-
NOTE: a port of XCoord java (org.opensextant.extractors.xcoord, in Xponents-Core)
21-
"""
22-
23-
def __init__(self, cfg="geocoord_patterns_py.cfg", debug=False):
24-
"""
25-
:param cfg: patterns config file.
26-
"""
27-
PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug))
28-
29-
3011
class ResolutionUncertainty:
3112
UNKNOWN = 100000
3213
REGIONAL = 50000
@@ -55,6 +36,26 @@ class Specificity:
5536
None: 1
5637
}
5738

39+
default_specificity = Specificity.SUBDEG
40+
41+
42+
# History - 2024 may - MCU ported from XCoord Java
43+
#
44+
class XCoord(PatternExtractor):
45+
"""
46+
NOTE: a port of XCoord java (org.opensextant.extractors.xcoord, in Xponents-Core)
47+
"""
48+
49+
def __init__(self, cfg="geocoord_patterns_py.cfg", debug=False, specificity=Specificity.SUBDEG):
50+
"""
51+
:param cfg: patterns config file.
52+
:param specificity: the abstract level of resolution for validating coordinates, e.g., DEGREE, SUB-DEGREE, etc.
53+
use Specificity enumeration
54+
"""
55+
PatternExtractor.__init__(self, RegexPatternManager(cfg, debug=debug, testing=debug))
56+
global default_specificity
57+
default_specificity = specificity
58+
5859

5960
def hemisphere_factor(sym: str) -> int:
6061
if sym:
@@ -79,12 +80,14 @@ def is_blank(txt: str):
7980
return False
8081
return txt == '' or txt.strip() == ''
8182

82-
def strip(txt:str):
83+
84+
def strip(txt: str):
8385
if txt is None:
8486
# Sorry -- you have to determine if obj is string or not first. None does not count.
8587
return False
8688
return txt.strip()
8789

90+
8891
class Hemisphere:
8992
def __init__(self, axis, slots=None):
9093
self.axis = axis
@@ -178,7 +181,7 @@ def has_subsec(self):
178181
return self.specificity == Specificity.SUBSECOND
179182

180183
def has_symbols(self):
181-
return len(self.symbols) > 1
184+
return len(self.symbols) > 0
182185

183186
def normalize(self):
184187
"""
@@ -327,11 +330,11 @@ def __init__(self, *args, **kwargs):
327330
PatternMatch.__init__(self, *args, **kwargs)
328331
self.case = PatternMatch.UPPER_CASE
329332
self.geodetic = None
330-
self.coordinate = None
331-
self.parsing_err = None
332-
self.lat_ordinate = None
333-
self.lon_ordinate = None
334-
self.filter = None
333+
self.coordinate: Coordinate = None
334+
self.parsing_err: str = None
335+
self.lat_ordinate: DMSOrdinate = None
336+
self.lon_ordinate: DMSOrdinate = None
337+
self.filter: GeocoordFilter = None
335338
self.pattern_family = self.pattern_id.split("-", 1)[0]
336339

337340
def __str__(self):
@@ -358,6 +361,22 @@ def _make_coordinate(self):
358361
self.coordinate = Coordinate(None, lat=LL.lat, lon=LL.lon)
359362
# These are parsed by UTM and MGRS libraries, so coordinate is assumed valid.
360363

364+
def filter_by_resolution(self):
365+
""" Check specificity of ordinates -- as parsed, if LAT or LON has the minimum level of detail
366+
367+
40N -- could be "40 North"
368+
+40.0000 -- also "40 North", but precision is specified to 4sigfig.
369+
+40:00:00 -- well, could also be an hour marker ~ 40 hours
370+
371+
:return: TRUE if coordinate is specific and resolution is high enough.
372+
"""
373+
if not self.lat_ordinate or not self.lon_ordinate:
374+
# If unset, we'll simply filter OUT
375+
return False
376+
lat_valid = self.lat_ordinate.specificity >= default_specificity
377+
lon_valid = self.lon_ordinate.specificity >= default_specificity
378+
return lat_valid and lon_valid
379+
361380

362381
class GeocoordFilter:
363382
def filter_out(self, m: GeocoordMatch) -> tuple:
@@ -369,7 +388,9 @@ def __init__(self):
369388
GeocoordFilter.__init__(self)
370389
self.date_formats = ["DDMMMYYYY", "DMMMYYHHmm", "DDMMMYYHHmm", "DDMMMYY", "DMMMYY", "HHZZZYYYY"]
371390
self.sequences = ["1234", "123456", "12345678", "1234567890"]
372-
self.stop_terms = { "PER", "SEC", "UTC", "GMT", "GAL"}
391+
self.stop_terms = {"PER", "SEC", "UTC", "GMT", "GAL", "USC", "CAN",
392+
"JAN", "FEB", "MAR", "APR", "MAY", "JUN",
393+
"JUL", "AUG", "SEP", "OCT", "NOV", "DEC"}
373394
self.today = arrow.utcnow()
374395
self.YEAR = self.today.date().year
375396
self.YY = self.YEAR - 2000
@@ -392,6 +413,8 @@ def filter_out(self, mgrs: GeocoordMatch) -> tuple:
392413

393414
if not (mgrs.text.isupper() and len(mgrs.text.replace(" ", "")) > 6):
394415
return True, "lexical"
416+
if "\t" in mgrs.text or "\n" in mgrs.text:
417+
return True, "format-ws"
395418
for term in self.stop_terms:
396419
if term in mgrs.textnorm:
397420
return True, "measure"
@@ -430,6 +453,11 @@ def filter_out(self, dms: GeocoordMatch) -> tuple:
430453
Easy filter -- if puncutation matches, this is an easy pattern to ignore.
431454
:return: True if filtered out, false positive.
432455
"""
456+
if dms.is_valid:
457+
if not dms.filter_by_resolution():
458+
# Not valid -- or at least not meeting users level of specificity.
459+
return True
460+
433461
if dms.is_valid:
434462
if dms.text[0].isalpha():
435463
return False, None
@@ -581,16 +609,18 @@ def validate(self):
581609
55.60, 80.11 -- not valid
582610
N55.60, W80.11 -- valid
583611
+55.60, -80.11 -- valid
612+
S20 E33 -- not valid, by default looking for sub-degree resolution.
584613
585614
Validate also if the coordinate is a valid range for Lat/Lon.
586615
"""
587616
if not self.is_valid:
588617
return
618+
589619
lath = self.lat_ordinate.hemi
590620
lonh = self.lon_ordinate.hemi
591621
valid_hemi = lath and lonh and lath.is_alpha() and lonh.is_alpha()
592622
valid_sym = self.lat_ordinate.has_symbols() or self.lon_ordinate.has_symbols()
593-
self.is_valid = valid_hemi or valid_sym
623+
self.is_valid = (valid_hemi or valid_sym) and self.filter_by_resolution()
594624

595625
self.filtered_out = not self.is_valid
596626

src/main/python/opensextant/resources/geocoord_patterns_py.cfg

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,8 @@
562562
#TEST DD 01 N42.3°, W102.4°
563563
#TEST DD 01 N98.3°, W192.4° #FAIL test out lon deg validation.
564564
#TEST DD 01 N98.3°, W292.4° #FAIL test out lon deg validation.
565+
#TEST DD 01 S20 E33 # FAIL does not meet minimum specificity
566+
#TEST DD 01 S20° E33° # FAIL does not meet minimum specificity
565567

566568
// FORM: DD-xx, Decimal Deg, Postpending Hemisphere (a) DD.DDDDDD°H DDD.DDDDDD°H, optional deg symbol
567569
#RULE DD 02 \b<decDegLat><degSym>?<hemiLat><latlonSep3><decDegLon><degSym>?<hemiLon>\b
@@ -597,26 +599,28 @@
597599

598600
// FORM: DD-xx, Decimal Deg, Preceding Hemisphere (a) HDD° HDDD°, required deg symbol
599601
#RULE DD 05 \b<hemiLatPre>?<degLat><degSym><latlonSep3><hemiLonPre>?<degLon><degSym>
600-
#TEST DD 05 N42°, W102°
602+
#TEST DD 05 N42°, W102° # FAIL - resolution
601603
#TEST DD 05 N42W102 #FAIL -- hemispheres are right, but text is too short and lack of separator leads to ambiguity.
602-
#TEST DD 05 -42°, 102°
603-
#TEST DD 05 N 42°, W 102°
604-
#TEST DD 05 -42°, +102°
605-
#TEST DD 05 -42°, 102°
606-
#TEST DD 05 -42°, 102°
607-
#TEST DD 05 +42°, -102°
604+
#TEST DD 05 -42°, 102° # FAIL - resolution
605+
#TEST DD 05 N 42°, W 102° # FAIL - resolution
606+
#TEST DD 05 -42°, +102° # FAIL - resolution
607+
#TEST DD 05 -42°, 102° # FAIL - resolution
608+
#TEST DD 05 -42°, 102° # FAIL - resolution
609+
#TEST DD 05 +42°, -102° # FAIL - resolution
610+
#TEST DD 01 S20 E33 # FAIL does not meet minimum specificity
611+
#TEST DD 01 S20° E33° # FAIL does not meet minimum specificity
608612

609613
#RULE DD 06 \b<degLat><degSym>?\s?<hemiLat><latlonSep3><degLon><degSym>?\s?<hemiLon>\b
610-
#TEST DD 06 42°N, 102°N FAIL
611-
#TEST DD 06 42° N, 102° W
612-
#TEST DD 06 42 N, 102 W
613-
#TEST DD 06 42N x 102W
614-
#TEST DD 06 00 N 130 WA FAIL
614+
#TEST DD 06 42°N, 102°N # FAIL - two North hemispheres
615+
#TEST DD 06 42° N, 102° W # FAIL - resolution
616+
#TEST DD 06 42 N, 102 W # FAIL - resolution
617+
#TEST DD 06 42N x 102W # FAIL - resolution
618+
#TEST DD 06 00 N 130 WA # FAIL
615619

616620
#RULE DD 07 \b<hemiLat>\s?<degLat><latlonSep3><hemiLon>\s?<degLon>\b
617-
#TEST DD 07 N42, W102
618-
#TEST DD 07 N42 x W102
619-
#TEST DD 07 N 42 W 102
621+
#TEST DD 07 N42, W102 # FAIL - resolution
622+
#TEST DD 07 N42 x W102 # FAIL - resolution
623+
#TEST DD 07 N 42 W 102 # FAIL - resolution
620624

621625
//#RULE DD 08 \b<hemiLatSign>?<decDegLat>,\s?<hemiLonSign>?<decDegLon>
622626
//#TEST DD 08 text 54.67, -117 text # FAIL - no decimal longitude

src/main/python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
setup(
1616
name='opensextant',
17-
version='1.6.8',
17+
version='1.6.9',
1818

1919
description='OpenSextant APIs and Utilities',
2020
long_description=long_description,

src/main/python/tests/test_xcoord.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
for m in matches:
88
print(m, m.filtered_out)
99

10-
dms = ["'18 51.1S 34 38.8W'",
10+
dms = ["S20 E33",
11+
"42.3° x 102.4°",
12+
"'18 51.1S 34 38.8W'",
1113
"08 00.4S 30 35.2W", # DM
1214
"08.00.4S 30.35.2W", # DMS pattern
1315
"08\n00.4S 30 35.2W",

0 commit comments

Comments
 (0)