@@ -74,6 +74,16 @@ def __init__(self, msg: str):
7474 super ().__init__ (msg )
7575
7676
77+ class TechNotFoundException (Exception ):
78+ def __init__ (self , msg : str ):
79+ super ().__init__ (msg )
80+
81+
82+ class InvalidURLException (Exception ):
83+ def __init__ (self , msg : str ):
84+ super ().__init__ (msg )
85+
86+
7787class AbstractValidator :
7888 def __init__ (self , required : bool = False ):
7989 self ._required = required
@@ -185,6 +195,27 @@ def get_type(self) -> list[Type]:
185195 return [str ]
186196
187197
198+ class URLValidator (StringValidator ):
199+ def __init__ (self , required : bool = False ):
200+ super ().__init__ (required )
201+ self ._url_pattern : Final [re .Pattern ] = re .compile (
202+ r"^https?://"
203+ r"(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)+"
204+ r"[A-Za-z0-9-]{2,}"
205+ r"(?::\d+)?"
206+ r"(?:/[^\s]*)?"
207+ r"$"
208+ )
209+
210+ def _validate (self , tech_name : str , data : Any ) -> bool :
211+ if not super ()._validate (tech_name , data ):
212+ return False
213+ if not self ._url_pattern .match (data ):
214+ self ._set_custom_error (InvalidURLException (f"Tech '{ tech_name } ' has invalid URL: '{ data } '" ))
215+ return False
216+ return True
217+
218+
188219class BoolValidator (AbstractValidator ):
189220 def get_type (self ) -> list [Type ]:
190221 return [bool ]
@@ -317,6 +348,22 @@ def _validate(self, tech_name: str, data: Any) -> bool:
317348 return True
318349
319350
351+ class ReferenceValidator (ArrayValidator ):
352+ def __init__ (self , all_techs : set [str ]):
353+ super ().__init__ ()
354+ self ._all_techs : Final [set [str ]] = all_techs
355+
356+ def _validate (self , tech_name : str , data : Any ) -> bool :
357+ if not super ()._validate (tech_name , data ):
358+ return False
359+ for ref in data :
360+ clean_ref : str = ref .split (r"\;" )[0 ]
361+ if clean_ref not in self ._all_techs :
362+ self ._set_custom_error (TechNotFoundException (f"Tech '{ tech_name } ' references '{ clean_ref } ' but it doesn't exist!" ))
363+ return False
364+ return True
365+
366+
320367class TechnologiesValidator :
321368 def __init__ (self , file_name : str ):
322369 self ._SOURCE_DIR : Final [str ] = "src"
@@ -328,18 +375,19 @@ def __init__(self, file_name: str):
328375 self ._IMAGES_DIR : Final [str ] = "images"
329376 self ._ICONS_DIR : Final [str ] = "icons"
330377 self ._ICONS : Final [list [str ]] = [icon .name for icon in pathlib .Path (self ._SOURCE_DIR ).joinpath (self ._IMAGES_DIR ).joinpath (self ._ICONS_DIR ).iterdir ()]
378+ self ._ALL_TECHS : Final [set [str ]] = self ._get_all_tech_names ()
331379 self ._validators : dict [str , AbstractValidator ] = { # TODO confidence and version validator
332380 "cats" : CategoryValidator (self ._CATEGORIES , True ),
333- "website" : StringValidator (True ),
381+ "website" : URLValidator (True ),
334382 "description" : StringValidator (),
335383 "icon" : IconValidator (self ._ICONS ),
336384 "cpe" : CPEValidator (),
337385 "saas" : BoolValidator (),
338386 "oss" : BoolValidator (),
339387 "pricing" : PricingValidator (),
340- "implies" : ArrayValidator (), # TODO cat validation
341- "requires" : ArrayValidator (), # TODO ^
342- "excludes" : ArrayValidator (), # TODO ^
388+ "implies" : ReferenceValidator ( self . _ALL_TECHS ),
389+ "requires" : ReferenceValidator ( self . _ALL_TECHS ),
390+ "excludes" : ReferenceValidator ( self . _ALL_TECHS ),
343391 "requiresCategory" : CategoryValidator (self ._CATEGORIES ),
344392 "cookies" : DictValidator (contains_regex = True ),
345393 "dom" : DomValidator (),
@@ -385,6 +433,16 @@ def _duplicate_key_validator(cls, pairs: list[tuple[str, Any]]) -> dict[str, Any
385433 result [key ] = value
386434 return result
387435
436+ def _get_all_tech_names (self ) -> set [str ]:
437+ all_techs : set [str ] = set ()
438+ for letter in list (string .ascii_lowercase ) + ["_" ]:
439+ tech_file : pathlib .Path = self ._FULL_TECH_DIR .joinpath (f"{ letter } .json" )
440+ if tech_file .exists ():
441+ with tech_file .open ("r" , encoding = "utf8" ) as f :
442+ technologies : dict = json .load (f )
443+ all_techs .update (technologies .keys ())
444+ return all_techs
445+
388446
389447class TechnologyProcessor :
390448 def __init__ (self , tech_name : str , tech_data : dict , validators : dict [str , AbstractValidator ]):
0 commit comments