Skip to content

Commit 7816da7

Browse files
committed
add validator page and api route
1 parent 1d4f1dc commit 7816da7

File tree

10 files changed

+617
-5
lines changed

10 files changed

+617
-5
lines changed

app/api_schemas.py

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,21 @@
11
from enum import Enum as PyEnum
2+
import os
3+
import json
24

35
from apiflask import Schema, validators
4-
from apiflask.fields import UUID, Boolean, DateTime, Dict, Enum, Integer, List, String
6+
from apiflask.fields import (
7+
UUID,
8+
Boolean,
9+
DateTime,
10+
Dict,
11+
Enum,
12+
Integer,
13+
List,
14+
String,
15+
URL,
16+
)
17+
import marshmallow
18+
from marshmallow import ValidationError, validate
519

620
from shared.constants import (
721
ACTION_VALUES,
@@ -14,6 +28,8 @@
1428
SOURCE_TYPE_VALUES,
1529
)
1630

31+
IS_PROD = os.getenv("FLASK_ENV") == "production"
32+
1733

1834
def _to_enum(name, values):
1935
"""Make an iterable of values into a Python enum.
@@ -117,3 +133,59 @@ class SourceInfo(Schema):
117133
source_type = Enum(SOURCE_TYPE_ENUM, required=True)
118134
notification_frequency = Enum(NOTIFICATION_FREQUENCY_ENUM, required=True)
119135
collection_parent_url = String()
136+
137+
138+
class ValidatorInfo(Schema):
139+
schema = String(
140+
required=True,
141+
validate=validators.OneOf(
142+
[
143+
"dcatus1.1: federal dataset",
144+
"dcatus1.1: non-federal dataset",
145+
]
146+
),
147+
)
148+
fetch_method = String(
149+
required=True,
150+
validate=validators.OneOf(
151+
[
152+
"url",
153+
"paste",
154+
]
155+
),
156+
)
157+
url = URL(require_tld=IS_PROD)
158+
159+
@marshmallow.validates_schema
160+
def validate_url(self, data, **kwargs):
161+
if data.get("fetch_method") == "url" and not data.get("url"):
162+
raise ValidationError("'url' field is required when fetch_method is 'url'")
163+
164+
json_text = String()
165+
166+
@marshmallow.validates_schema
167+
def validate_json_text(self, data, **kwargs):
168+
if data.get("fetch_method") == "paste":
169+
if not data.get("json_text"):
170+
raise ValidationError(
171+
"'json_text' field is required when fetch_method is 'paste'"
172+
)
173+
else:
174+
try:
175+
json.loads(data.get("json_text"))
176+
except json.JSONDecodeError:
177+
raise ValidationError("Invalid JSON")
178+
179+
180+
class ValidationResultSchema(Schema):
181+
validation_errors = List(
182+
List(
183+
String(),
184+
validate=validate.Length(equal=2),
185+
),
186+
required=True,
187+
)
188+
189+
190+
class ValidationErrorResponseSchema(Schema):
191+
error = String(required=True)

app/forms.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import ast
22
import os
33
import re
4+
import json
45

56
from flask_wtf import FlaskForm
67
from wtforms import BooleanField, SelectField, StringField, SubmitField, TextAreaField
@@ -18,6 +19,18 @@
1819
is_prod = os.getenv("FLASK_ENV") == "production"
1920

2021

22+
def validate_json_format(form, field):
23+
"""
24+
Custom validator to check if the field data is valid JSON.
25+
"""
26+
try:
27+
json.loads(field.data)
28+
except json.JSONDecodeError:
29+
raise ValidationError(
30+
"Invalid JSON format. Please ensure the data is a valid JSON string."
31+
)
32+
33+
2134
def validate_email_list(form, field):
2235
emails = field.data
2336
for email in emails:
@@ -165,3 +178,53 @@ class HarvestTriggerForm(FlaskForm):
165178
class OrganizationTriggerForm(FlaskForm):
166179
edit = SubmitField("Edit")
167180
delete = SubmitField("Delete")
181+
182+
183+
def url_paste_validate(form, field):
184+
if form.fetch_method.data == "url":
185+
if not form.url.data:
186+
raise ValidationError("URL is required.")
187+
elif form.fetch_method.data == "paste":
188+
if not form.json_text.data:
189+
raise ValidationError("JSON input is required.")
190+
else:
191+
validate_json_format(form, form.json_text)
192+
193+
return True
194+
195+
196+
class ValidatorForm(FlaskForm):
197+
schema = SelectField(
198+
"Schema",
199+
choices=[
200+
"dcatus1.1: federal dataset",
201+
"dcatus1.1: non-federal dataset",
202+
],
203+
validators=[DataRequired()],
204+
)
205+
fetch_method = SelectField(
206+
"Fetch Method",
207+
choices=[
208+
("url", "Fetch from URL"),
209+
("paste", "Paste JSON"),
210+
],
211+
validators=[DataRequired()],
212+
)
213+
url = StringField(
214+
"URL",
215+
validators=[url_paste_validate],
216+
filters=[strip_filter],
217+
)
218+
219+
def validate_url(self, field):
220+
if self.fetch_method.data == "url" and field.data:
221+
try:
222+
URL(require_tld=is_prod)(self, field)
223+
except ValidationError:
224+
raise ValidationError("Invalid URL")
225+
226+
json_text = TextAreaField(
227+
"DCATUS Catalog JSON Input",
228+
validators=[url_paste_validate],
229+
)
230+
submit = SubmitField("Validate")

app/routes.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,25 @@
5151
QueryInfo,
5252
RecordInfo,
5353
SourceInfo,
54+
ValidatorInfo,
55+
ValidationResultSchema,
56+
ValidationErrorResponseSchema,
5457
)
5558
from .auth import LoginRequiredAuth
5659
from .forms import (
5760
HarvestSourceForm,
5861
HarvestTriggerForm,
5962
OrganizationForm,
6063
OrganizationTriggerForm,
64+
ValidatorForm,
6165
)
6266
from .paginate import Pagination
6367
from .util import (
6468
make_new_org_contract,
6569
make_new_record_error_contract,
6670
make_new_source_contract,
71+
fetch_json_from_url,
72+
validate_records,
6773
)
6874

6975
logger = logging.getLogger("harvest_admin")
@@ -1331,6 +1337,85 @@ def json_builder_query(**kwargs):
13311337
return "Error with query", 400
13321338

13331339

1340+
@api.route("/api/validate", methods=["POST"])
1341+
@api.input(ValidatorInfo)
1342+
@api.output(ValidationResultSchema, status_code=200)
1343+
@api.doc(
1344+
summary="Validate a DCAT catalog against a v1.1 schema",
1345+
description="Downloads or parses a DCATUS catalog and validates each dataset.",
1346+
responses={
1347+
500: {
1348+
"description": "Failed to download or process the catalog",
1349+
"content": {"application/json": {"schema": ValidationErrorResponseSchema}},
1350+
},
1351+
},
1352+
)
1353+
def validator(json_data):
1354+
"""
1355+
api route for validating v1.1 dcatus catalogs
1356+
"""
1357+
errors = []
1358+
data = []
1359+
1360+
try:
1361+
if json_data["fetch_method"] == "url":
1362+
data = fetch_json_from_url(json_data["url"])
1363+
1364+
if json_data["fetch_method"] == "paste":
1365+
data = json.loads(json_data["json_text"])
1366+
1367+
errors = validate_records(data, json_data["schema"])
1368+
except Exception as e:
1369+
logger.error(f"API Validator error :: {repr(e)}")
1370+
return make_response(
1371+
jsonify(
1372+
{"error": "API Validator error: failed to validate dcatus catalog"}
1373+
),
1374+
500,
1375+
)
1376+
1377+
return make_response(
1378+
jsonify({"validation_errors": errors}),
1379+
200,
1380+
)
1381+
1382+
1383+
@main.route("/validate/", methods=["GET", "POST"])
1384+
def view_validators():
1385+
"""
1386+
view for validating v1.1 dcatus catalogs using form
1387+
"""
1388+
data = []
1389+
errors = []
1390+
submitted = False
1391+
1392+
form = ValidatorForm()
1393+
if form.validate_on_submit():
1394+
if form.fetch_method.data == "url":
1395+
try:
1396+
data = fetch_json_from_url(form.url.data)
1397+
except Exception as e:
1398+
form.url.errors.append(str(e))
1399+
if form.fetch_method.data == "paste":
1400+
data = json.loads(form.json_text.data)
1401+
1402+
errors = validate_records(data, form.schema.data)
1403+
1404+
if not form.errors:
1405+
submitted = True
1406+
1407+
data = {
1408+
"record_errors": errors,
1409+
}
1410+
1411+
return render_template(
1412+
"view_validators.html",
1413+
form=form,
1414+
data=data,
1415+
submitted=submitted,
1416+
)
1417+
1418+
13341419
def register_routes(app):
13351420
app.register_blueprint(main)
13361421
app.register_blueprint(api)

app/templates/base.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
<li><a href="{{ url_for('main.organization_list') }}">Organizations</a></li>
3838
<li><a href="{{ url_for('main.harvest_source_list') }}">Harvest Sources</a></li>
3939
<li><a href="{{ url_for('main.view_metrics') }}">Metrics</a></li>
40+
<li><a href="{{ url_for('main.view_validators') }}">Validators</a></li>
4041
{% if session['user'] %}
4142
<li>{{ session['user'] }}</li>
4243
<li><a href="{{ url_for('main.logout') }}">Logout</a></li>

0 commit comments

Comments
 (0)