Skip to content

Commit be9c66d

Browse files
more validators
1 parent efefd1e commit be9c66d

File tree

11 files changed

+149
-5
lines changed

11 files changed

+149
-5
lines changed

.github/workflows/scripts/icon_path_validator.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,19 @@
22
import pathlib
33
import string
44
from typing import Final
5+
from xml.etree import ElementTree
56

67

78
class InvalidStructureException(Exception):
89
def __init__(self, msg: str):
910
super().__init__(msg)
1011

1112

13+
class InvalidSVGException(Exception):
14+
def __init__(self, msg: str):
15+
super().__init__(msg)
16+
17+
1218
class IconValidator:
1319
def __init__(self):
1420
self._SOURCE_DIR: Final[str] = "src"
@@ -23,6 +29,16 @@ def validate(self) -> None:
2329
for file in self._FULL_IMAGES_DIR.iterdir():
2430
if file.name not in json_icons:
2531
raise InvalidStructureException(f"{file.name} must be used, {file} isn't used!")
32+
if file.name.lower().endswith(".svg"):
33+
self._validate_svg(file)
34+
35+
def _validate_svg(self, file: pathlib.Path) -> None:
36+
try:
37+
with file.open("r", encoding="utf8") as f:
38+
content: str = f.read()
39+
ElementTree.fromstring(content)
40+
except ElementTree.ParseError as e:
41+
raise InvalidSVGException(f"Invalid SVG '{file.name}': {e}")
2642

2743
def get_json_icons(self) -> set[str]:
2844
letters: list[str] = list(string.ascii_lowercase)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import json
2+
import pathlib
3+
from typing import Final
4+
5+
from jsonschema import validate, ValidationError
6+
7+
8+
class SchemaValidationException(Exception):
9+
def __init__(self, msg: str):
10+
super().__init__(msg)
11+
12+
13+
class SchemaValidator:
14+
def __init__(self):
15+
self._SOURCE_DIR: Final[str] = "src"
16+
self._TECH_DIR: Final[str] = "technologies"
17+
self._FULL_TECH_DIR: Final[pathlib.Path] = pathlib.Path(self._SOURCE_DIR).joinpath(self._TECH_DIR)
18+
self._SCHEMA_FILE: Final[pathlib.Path] = pathlib.Path("schema.json")
19+
20+
def validate(self) -> None:
21+
if not self._SCHEMA_FILE.is_file():
22+
raise FileNotFoundError(f"Schema file '{self._SCHEMA_FILE}' not found!")
23+
with self._SCHEMA_FILE.open("r", encoding="utf8") as f:
24+
schema: dict = json.load(f)
25+
for tech_file in sorted(self._FULL_TECH_DIR.iterdir()):
26+
if not tech_file.name.endswith(".json"):
27+
continue
28+
with tech_file.open("r", encoding="utf8") as f:
29+
technologies: dict = json.load(f)
30+
try:
31+
validate(instance=technologies, schema=schema)
32+
except ValidationError as e:
33+
path: str = " -> ".join(str(p) for p in e.absolute_path) if e.absolute_path else "root"
34+
raise SchemaValidationException(f"{tech_file.name}: {e.message} (at {path})")
35+
36+
37+
if __name__ == '__main__':
38+
SchemaValidator().validate()

.github/workflows/scripts/technology_validator.py

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ def __init__(self, msg: str):
7474
super().__init__(msg)
7575

7676

77+
class TechNotFoundException(Exception):
78+
def __init__(self, msg: str):
79+
super().__init__(msg)
80+
81+
82+
class InvalidURLException(Exception):
83+
def __init__(self, msg: str):
84+
super().__init__(msg)
85+
86+
7787
class AbstractValidator:
7888
def __init__(self, required: bool = False):
7989
self._required = required
@@ -185,6 +195,27 @@ def get_type(self) -> list[Type]:
185195
return [str]
186196

187197

198+
class URLValidator(StringValidator):
199+
def __init__(self, required: bool = False):
200+
super().__init__(required)
201+
self._url_pattern: Final[re.Pattern] = re.compile(
202+
r"^https?://"
203+
r"(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)+"
204+
r"[A-Za-z0-9-]{2,}"
205+
r"(?::\d+)?"
206+
r"(?:/[^\s]*)?"
207+
r"$"
208+
)
209+
210+
def _validate(self, tech_name: str, data: Any) -> bool:
211+
if not super()._validate(tech_name, data):
212+
return False
213+
if not self._url_pattern.match(data):
214+
self._set_custom_error(InvalidURLException(f"Tech '{tech_name}' has invalid URL: '{data}'"))
215+
return False
216+
return True
217+
218+
188219
class BoolValidator(AbstractValidator):
189220
def get_type(self) -> list[Type]:
190221
return [bool]
@@ -317,6 +348,22 @@ def _validate(self, tech_name: str, data: Any) -> bool:
317348
return True
318349

319350

351+
class ReferenceValidator(ArrayValidator):
352+
def __init__(self, all_techs: set[str]):
353+
super().__init__()
354+
self._all_techs: Final[set[str]] = all_techs
355+
356+
def _validate(self, tech_name: str, data: Any) -> bool:
357+
if not super()._validate(tech_name, data):
358+
return False
359+
for ref in data:
360+
clean_ref: str = ref.split(r"\;")[0]
361+
if clean_ref not in self._all_techs:
362+
self._set_custom_error(TechNotFoundException(f"Tech '{tech_name}' references '{clean_ref}' but it doesn't exist!"))
363+
return False
364+
return True
365+
366+
320367
class TechnologiesValidator:
321368
def __init__(self, file_name: str):
322369
self._SOURCE_DIR: Final[str] = "src"
@@ -328,18 +375,19 @@ def __init__(self, file_name: str):
328375
self._IMAGES_DIR: Final[str] = "images"
329376
self._ICONS_DIR: Final[str] = "icons"
330377
self._ICONS: Final[list[str]] = [icon.name for icon in pathlib.Path(self._SOURCE_DIR).joinpath(self._IMAGES_DIR).joinpath(self._ICONS_DIR).iterdir()]
378+
self._ALL_TECHS: Final[set[str]] = self._get_all_tech_names()
331379
self._validators: dict[str, AbstractValidator] = { # TODO confidence and version validator
332380
"cats": CategoryValidator(self._CATEGORIES, True),
333-
"website": StringValidator(True),
381+
"website": URLValidator(True),
334382
"description": StringValidator(),
335383
"icon": IconValidator(self._ICONS),
336384
"cpe": CPEValidator(),
337385
"saas": BoolValidator(),
338386
"oss": BoolValidator(),
339387
"pricing": PricingValidator(),
340-
"implies": ArrayValidator(), # TODO cat validation
341-
"requires": ArrayValidator(), # TODO ^
342-
"excludes": ArrayValidator(), # TODO ^
388+
"implies": ReferenceValidator(self._ALL_TECHS),
389+
"requires": ReferenceValidator(self._ALL_TECHS),
390+
"excludes": ReferenceValidator(self._ALL_TECHS),
343391
"requiresCategory": CategoryValidator(self._CATEGORIES),
344392
"cookies": DictValidator(contains_regex=True),
345393
"dom": DomValidator(),
@@ -385,6 +433,16 @@ def _duplicate_key_validator(cls, pairs: list[tuple[str, Any]]) -> dict[str, Any
385433
result[key] = value
386434
return result
387435

436+
def _get_all_tech_names(self) -> set[str]:
437+
all_techs: set[str] = set()
438+
for letter in list(string.ascii_lowercase) + ["_"]:
439+
tech_file: pathlib.Path = self._FULL_TECH_DIR.joinpath(f"{letter}.json")
440+
if tech_file.exists():
441+
with tech_file.open("r", encoding="utf8") as f:
442+
technologies: dict = json.load(f)
443+
all_techs.update(technologies.keys())
444+
return all_techs
445+
388446

389447
class TechnologyProcessor:
390448
def __init__(self, tech_name: str, tech_data: dict, validators: dict[str, AbstractValidator]):

.github/workflows/validate.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,27 @@ jobs:
1818
- name: run structure validator
1919
run: python3 .github/workflows/scripts/structure_validator.py
2020

21+
validate_schema:
22+
runs-on: ubuntu-22.04
23+
needs: validate_structure
24+
strategy:
25+
matrix:
26+
python-version: [ "3.12" ]
27+
steps:
28+
- name: checkout repository
29+
uses: actions/checkout@v4
30+
31+
- name: set up Python ${{ matrix.python-version }}
32+
uses: actions/setup-python@v5
33+
with:
34+
python-version: ${{ matrix.python-version }}
35+
36+
- name: install dependencies
37+
run: python3 -m pip install jsonschema
38+
39+
- name: run schema validator
40+
run: python3 .github/workflows/scripts/schema_validator.py
41+
2142
validate_categories:
2243
runs-on: ubuntu-22.04
2344
needs: validate_structure

src/technologies/d.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2266,6 +2266,7 @@
22662266
"cats": [
22672267
1
22682268
],
2269+
"cpe": "cpe:2.3:a:monospace:directus:*:*:*:*:*:node.js:*:*",
22692270
"description": "Directus is a free and open-source headless CMS framework for managing custom SQL-based databases.",
22702271
"headers": {
22712272
"x-powered-by": "^Directus$"

src/technologies/g.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,7 @@
479479
57,
480480
12
481481
],
482+
"cpe": "cpe:2.3:a:gatsbyjs:gatsby:*:*:*:*:*:node.js:*:*",
482483
"description": "Gatsby is a React-based open-source framework with performance, scalability and security built-in.",
483484
"dom": [
484485
"div#___gatsby, style#gatsby-inlined-css"
@@ -1409,6 +1410,7 @@
14091410
1,
14101411
11
14111412
],
1413+
"cpe": "cpe:2.3:a:ghost:ghost:*:*:*:*:*:node.js:*:*",
14121414
"description": "Ghost is a powerful app for new-media creators to publish, share, and grow a business around their content.",
14131415
"headers": {
14141416
"X-Ghost-Cache-Status": ""

src/technologies/j.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,7 @@
674674
"cats": [
675675
22
676676
],
677+
"cpe": "cpe:2.3:a:eclipse:jetty:*:*:*:*:*:*:*:*",
677678
"description": "Jetty is an open-source web server and servlet container known for its scalability and efficiency, supporting protocols like HTTP and WebSocket for various applications from development tools to cloud services.",
678679
"headers": {
679680
"Server": "Jetty(?:\\(([\\d\\.]*\\d+))?\\;version:\\1"

src/technologies/k.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2003,6 +2003,7 @@
20032003
18,
20042004
22
20052005
],
2006+
"cpe": "cpe:2.3:a:koajs:koa:*:*:*:*:*:node.js:*:*",
20062007
"headers": {
20072008
"X-Powered-By": "^koa$"
20082009
},
@@ -2274,6 +2275,7 @@
22742275
"cats": [
22752276
64
22762277
],
2278+
"cpe": "cpe:2.3:a:konghq:kong:*:*:*:*:*:*:*:*",
22772279
"description": "Kong is an open-source API gateway and platform that acts as middleware between compute clients and the API-centric applications.",
22782280
"headers": {
22792281
"via": "^kong/([\\d\\.]+)(?:.+)?$\\;version:\\1"

src/technologies/m.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6701,7 +6701,7 @@
67016701
"meta": {
67026702
"generator": "Mijnwebwinkel"
67036703
},
6704-
"website": "https://www.myonlinestore.com/ "
6704+
"website": "https://www.myonlinestore.com/"
67056705
},
67066706
"MyRest": {
67076707
"cats": [

src/technologies/n.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2536,6 +2536,7 @@
25362536
18,
25372537
57
25382538
],
2539+
"cpe": "cpe:2.3:a:nuxt:nuxt:*:*:*:*:*:*:*:*",
25392540
"description": "Nuxt is a Vue framework for developing modern web applications.",
25402541
"dom": [
25412542
"div[id^='__nuxt']"

0 commit comments

Comments
 (0)