Skip to content

Commit bb5c5dc

Browse files
authored
split topic validation to be subscription or publication driven (#14)
* split topic validation to be subscription or publication driven * update README * cache IANA TLDs
1 parent ec7193a commit bb5c5dc

File tree

5 files changed

+156
-17
lines changed

5 files changed

+156
-17
lines changed

README.md

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,20 +49,31 @@ pywis-topics --version
4949
pywis-topics bundle sync
5050
```
5151

52-
### Listing and validation
52+
### Listing and validating topics for subscription
5353

5454
```bash
5555
# validate a WIS2 topic hierarchy
56-
pywis-topics topic validate origin/a/wis2/ca-eccc-msc
56+
pywis-topics topic validate subscription origin/a/wis2/ca-eccc-msc
5757

5858
# validate a WIS2 topic hierarchy in no-strict mode
59-
pywis-topics topic validate --no-strict origin/a/wis2/fake-centre-id/data/core
59+
pywis-topics topic validate subscription --no-strict origin/a/wis2/fake-centre-id/data/core
6060

6161
# list children of a given WIS2 topic hierarchy level
6262
pywis-topics topic list wis2/a
6363

6464
# validate a WIS2 topic hierarchy with wildcards (needs no-strict mode)
65-
pywis-topics topic validate origin/a/wis2/+/data/core --no-strict
65+
pywis-topics topic validate subscription origin/a/wis2/+/data/core --no-strict
66+
```
67+
68+
### Validating topics for publication
69+
70+
```bash
71+
# validate a WIS2 topic hierarchy
72+
pywis-topics topic validate publication origin/a/wis2/ca-eccc-msc
73+
pywis-topics topic validate publication origin/a/wis2/ca-eccc-msc/ocean
74+
pywis-topics topic validate publication origin/a/wis2/us-noaa-nws/data/core/weather/surface-based-observations/synop
75+
# validate a WIS2 topic hierarchy in no-strict mode
76+
pywis-topics topic validate publication --no-strict origin/a/wis2/fake-centre-id/data/core
6677
```
6778

6879
### Centre identification validation

pywis_topics/bundle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def sync_bundle() -> None:
7878
shutil.copyfileobj(src, dest)
7979

8080
LOGGER.debug('Downloading IANA TLDs')
81-
IANA_URL = 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt'
81+
IANA_URL = 'https://wmo-im.github.io/wis2-topic-hierarchy/iana-tlds/tlds-alpha-by-domain.txt' # noqa
8282
iana_file = WIS2_TOPIC_HIERARCHY_DIR_TEMP / 'tlds-alpha-by-domain.txt'
8383
with iana_file.open('wb') as fh:
8484
fh.write(urlopen_(f'{IANA_URL}').read())

pywis_topics/pygeoapi_plugin.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,16 @@
138138
'links': WIS2_TOPIC_HIERARCHY_LINKS,
139139
'inputs': {
140140
'topic': WIS2_TOPIC_HIERARCHY_INPUT_TOPIC,
141+
'strict': {
142+
'type': 'boolean',
143+
'default': False,
144+
'description': 'Validate in strict mode (default true)'
145+
},
146+
'publication': {
147+
'type': 'boolean',
148+
'default': False,
149+
'description': 'Topic is publication-based (default false)'
150+
}
141151
},
142152
'outputs': {
143153
'result': {
@@ -223,6 +233,8 @@ def execute(self, data):
223233
response = None
224234
mimetype = 'application/json'
225235
topic = data.get('topic')
236+
publication = data.get('publication', False)
237+
strict = data.get('strict', True)
226238

227239
if topic is None:
228240
msg = 'Missing topic'
@@ -231,8 +243,11 @@ def execute(self, data):
231243

232244
LOGGER.debug('Querying topic')
233245
th = TopicHierarchy()
246+
topic_is_valid = th.validate(
247+
topic, strict=strict, publication=publication)
248+
234249
response = {
235-
'topic_is_valid': th.validate(topic)
250+
'topic_is_valid': topic_is_valid
236251
}
237252
return mimetype, response
238253

pywis_topics/topics.py

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,15 @@ def list_children(self, topic_hierarchy: str = None) -> List[str]:
125125
return matches
126126

127127
def validate(self, topic_hierarchy: str = None,
128-
strict: bool = True) -> bool:
128+
strict: bool = True, publication: bool = False) -> bool:
129129
"""
130130
Validates a topic hierarchy
131131
132132
:param topic_hierarchy: `str` of topic hierarchy
133133
:param strict: `bool` of whether to perform strict validation,
134134
including centre-id
135+
:param publication: `bool` of whether to perform validation
136+
for publication
135137
136138
:returns: `bool` of whether topic hierarchy is valid
137139
"""
@@ -149,6 +151,27 @@ def validate(self, topic_hierarchy: str = None,
149151
core_tokens = all_tokens[:6]
150152
esd_subtopic = '/'.join(all_tokens[6:])
151153

154+
if publication:
155+
if any(c in topic_hierarchy for c in ['#', '+']):
156+
msg = 'Invalid characters for publication'
157+
LOGGER.warning(msg)
158+
return False
159+
160+
if len(core_tokens) < 5:
161+
msg = 'Not enough tokens for publication'
162+
LOGGER.warning(msg)
163+
return False
164+
165+
if core_tokens[4] not in ['data', 'metadata']:
166+
msg = 'Invalid token for publication (must be data or metadata)' # noqa
167+
LOGGER.warning(msg)
168+
return False
169+
170+
if core_tokens[-2] == 'data':
171+
if esd_subtopic in [None, '']:
172+
LOGGER.debug('Earth system discipline subtopic is empty')
173+
return False
174+
152175
LOGGER.debug(f'Core tokens: {core_tokens}')
153176
LOGGER.debug(f'Earth system discipline subtopic: {esd_subtopic}')
154177
LOGGER.debug('Validating core tokens')
@@ -296,6 +319,12 @@ def topic():
296319
pass
297320

298321

322+
@click.group()
323+
def validate():
324+
"""Topic hierarchy validation utilities"""
325+
pass
326+
327+
299328
@click.command('list')
300329
@click.pass_context
301330
@get_cli_common_options
@@ -322,8 +351,8 @@ def list_(ctx, topic_hierarchy, logfile, verbosity):
322351
@click.option('--strict/--no-strict', default=True,
323352
help='Validate in strict mode')
324353
@click.argument('topic-hierarchy')
325-
def validate(ctx, topic_hierarchy, logfile, verbosity, strict=True):
326-
"""Validate topic hierarchy"""
354+
def subscription(ctx, topic_hierarchy, logfile, verbosity, strict=True):
355+
"""Validate topic hierarchy for subscription"""
327356

328357
setup_logger(verbosity, logfile)
329358

@@ -335,5 +364,24 @@ def validate(ctx, topic_hierarchy, logfile, verbosity, strict=True):
335364
click.echo('Invalid')
336365

337366

367+
@click.command()
368+
@click.pass_context
369+
@get_cli_common_options
370+
@click.argument('topic-hierarchy')
371+
def publication(ctx, topic_hierarchy, logfile, verbosity):
372+
"""Validate topic hierarchy for publication"""
373+
374+
setup_logger(verbosity, logfile)
375+
376+
th = TopicHierarchy()
377+
378+
if th.validate(topic_hierarchy, publication=True):
379+
click.echo('Valid')
380+
else:
381+
click.echo('Invalid')
382+
383+
338384
topic.add_command(list_)
339385
topic.add_command(validate)
386+
validate.add_command(subscription)
387+
validate.add_command(publication)

tests/run_tests.py

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,16 @@ def tearDown(self):
7575
"""return to pristine state"""
7676
pass
7777

78-
def test_validate(self):
78+
def test_validate_subscription(self):
7979
value = None
8080
with self.assertRaises(ValueError):
8181
_ = self.th.validate(value)
8282

83+
valid_topics = [
84+
'cache/a/wis2',
85+
'cache/a/wis2/ca-eccc-msc/data/core',
86+
]
87+
8388
invalid_topics = [
8489
'invalid.topic.hierarchy',
8590
'ORIGIN/A/wis2',
@@ -88,17 +93,12 @@ def test_validate(self):
8893
'a/wis2'
8994
]
9095

91-
valid_topics = [
92-
'cache/a/wis2',
93-
'cache/a/wis2/ca-eccc-msc/data/core',
94-
]
96+
for valid_topic in valid_topics:
97+
self.assertTrue(self.th.validate(valid_topic))
9598

9699
for invalid_topic in invalid_topics:
97100
self.assertFalse(self.th.validate(invalid_topic))
98101

99-
for valid_topic in valid_topics:
100-
self.assertTrue(self.th.validate(valid_topic))
101-
102102
value = 'cache/a/wis2/fake-centre-id/data/core'
103103
self.assertTrue(self.th.validate(value, strict=False))
104104

@@ -150,6 +150,71 @@ def test_validate(self):
150150
self.assertTrue(self.th.validate(value, strict=False))
151151
self.assertFalse(self.th.validate(value))
152152

153+
def test_validate_publication(self):
154+
value = None
155+
with self.assertRaises(ValueError):
156+
_ = self.th.validate(value)
157+
158+
valid_topics = [
159+
'origin/a/wis2/kz-kazhydromet/data/core/weather/surface-based-observations/synop', # noqa
160+
'origin/a/wis2/uk-metoffice/data/core/ocean/surface-based-observations/drifting-ocean-profilers', # noqa
161+
'origin/a/wis2/ca-eccc-msc/data/core/ocean/experimental' # noqa
162+
]
163+
164+
invalid_topics = [
165+
'invalid.topic.hierarchy',
166+
'ORIGIN/A/wis2',
167+
'origin/a/wis2/ca-é',
168+
'invalid/topic/hierarchy',
169+
'a/wis2',
170+
'cache/a/wis2',
171+
'origin/a/wis2/ca-eccc-msc/data/core',
172+
'cache/a/wis2/ca-eccc-msc/data/core',
173+
'cache/a/wis2/ca-eccc-msc/data/core/weather/surface-based-observations1' # noqa
174+
]
175+
176+
for valid_topic in valid_topics:
177+
self.assertTrue(self.th.validate(valid_topic, publication=True))
178+
179+
for invalid_topic in invalid_topics:
180+
self.assertFalse(self.th.validate(invalid_topic, publication=True))
181+
182+
value = 'cache/a/wis2/fake-centre-id/data/core'
183+
self.assertFalse(self.th.validate(value, strict=False,
184+
publication=True))
185+
186+
value = 'cache/a/+'
187+
self.assertFalse(self.th.validate(value, strict=False,
188+
publication=True))
189+
190+
value = 'cache/a/#'
191+
self.assertFalse(self.th.validate(value, strict=False,
192+
publication=True))
193+
194+
value = 'cache/a/wis2/+/data/core/#'
195+
self.assertFalse(self.th.validate(value, strict=False,
196+
publication=True))
197+
198+
value = 'cache/a/wis2/+/data/core/weather/#'
199+
self.assertFalse(self.th.validate(value, strict=False,
200+
publication=True))
201+
202+
value = 'cache/a/wis2/+/data/#/weather'
203+
self.assertFalse(self.th.validate(value, publication=True))
204+
205+
value = 'cache/a/wis2/+/data/core/weather/surface-based-observations'
206+
self.assertFalse(self.th.validate(value, strict=False,
207+
publication=True))
208+
209+
value = 'origin/a/wis2/ca-eccc-msc/data/core/ocean'
210+
self.assertTrue(self.th.validate(value, publication=True))
211+
212+
value = 'cache/a/wis2/ca-eccc-msc/data/core/ocean'
213+
self.assertTrue(self.th.validate(value, publication=True))
214+
215+
value = 'cache/a/wis2/io-wis2dev-11-test/data/core/ocean'
216+
self.assertTrue(self.th.validate(value, publication=True))
217+
153218
def test_list_children(self):
154219
value = None
155220
with self.assertRaises(ValueError):

0 commit comments

Comments
 (0)