@@ -221,12 +221,103 @@ samm-c:LocaleConstraintShape
221221 sh:maxCount 1 ;
222222 sh:name " localeCode" ;
223223 sh:description " An IETF BCP 47 locale code for the language of the value of the constrained Property" ;
224- sh:js [
225- a sh:JSConstraint ;
226- sh:jsLibrary samm:jsValidations ;
227- sh:jsFunctionName " isValidBCP47LanguageTag" ;
228- ] ;
229- ] .
224+ sh:pattern " ^[a-zA-Z]{2,3}(-[a-zA-Z0-9]{2,8})*$" ;
225+ sh:flags " i" ;
226+ ] ;
227+ sh:property [
228+ sh:path samm-c:localeCode ;
229+ sh:message " Invalid grandfathered locale code." ;
230+ sh:sparql [
231+ sh:select """
232+ prefix samm-c: <urn:samm:org.eclipse.esmf.samm:characteristic:2.3.0#>
233+
234+ select $this ?value
235+ where {
236+ $this samm-c:localeCode ?value .
237+ bind( lcase( str( ?value ) ) as ?localeStr )
238+
239+ # Check for grandfathered irregular tags
240+ filter ( ?localeStr IN ( "en-gb-oed", "i-ami", "i-bnn", "i-default", "i-enochian", "i-hak", "i-klingon", "i-lux", "i-mingo", "i-navajo", "i-pwn", "i-tao", "i-tay", "i-tsu", "sgn-be-fr", "sgn-be-nl", "sgn-ch-de") &&
241+ !( ?localeStr IN ( "en-gb-oed", "i-ami", "i-bnn", "i-default", "i-enochian", "i-hak", "i-klingon", "i-lux", "i-mingo", "i-navajo", "i-pwn", "i-tao", "i-tay", "i-tsu", "sgn-be-fr", "sgn-be-nl", "sgn-ch-de", "art-lojban", "cel-gaulish", "no-bok", "no-nyn", "zh-guoyu", "zh-hakka", "zh-min", "zh-min-nan", "zh-xiang")))
242+ }
243+ """
244+ ]
245+ ] ;
246+ sh:property [
247+ sh:path samm-c:localeCode ;
248+ sh:message " Invalid language in locale code." ;
249+ sh:sparql [
250+ sh:select """
251+ prefix samm-c: <urn:samm:org.eclipse.esmf.samm:characteristic:2.3.0#>
252+
253+ select $this ?value
254+ where {
255+ $this samm-c:localeCode ?value .
256+ bind(lcase(str(?value)) as ?localeStr)
257+
258+ # Extract language code (first part before hyphen, or entire string if no hyphen)
259+ bind( if( contains( ?localeStr, "-" ),
260+ substr( ?localeStr, 1, strlen( ?localeStr ) - strlen( strafter( ?localeStr, "-" ) ) - 1),
261+ ?localeStr ) as ?language )
262+
263+ # Validate against ISO 639-1/639-2/639-3 language codes
264+ filter ( ?language not in ("aa", "ab", "ae", "af", "ak", "am", "an", "ar", "as", "av", "ay", "az", "ba", "be", "bg", "bi", "bm", "bn", "bo", "br",
265+ "bs", "ca", "ce", "ch", "co", "cr", "cs", "cu", "cv", "cy", "da", "de", "dv", "dz", "ee", "el", "en", "eo", "es", "et", "eu", "fa", "ff", "fi",
266+ "fj", "fo", "fr", "fy", "ga", "gd", "gl", "gn", "gu", "gv", "ha", "he", "hi", "ho", "hr", "ht", "hu", "hy", "hz", "ia", "id", "ie", "ig", "ii",
267+ "ik", "io", "is", "it", "iu", "ja", "jv", "ka", "kg", "ki", "kj", "kk", "kl", "km", "kn", "ko", "kr", "ks", "ku", "kv", "kw", "ky", "la", "lb",
268+ "lg", "li", "ln", "lo", "lt", "lu", "lv", "mg", "mh", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "na", "nb", "nd", "ne", "ng", "nl", "nn",
269+ "no", "nr", "nv", "ny", "oc", "oj", "om", "or", "os", "pa", "pi", "pl", "ps", "pt", "qu", "rm", "rn", "ro", "ru", "rw", "sa", "sc", "sd", "se",
270+ "sg", "si", "sk", "sl", "sm", "sn", "so", "sq", "sr", "ss", "st", "su", "sv", "sw", "ta", "te", "tg", "th", "ti", "tk", "tl", "tn", "to", "tr",
271+ "ts", "tt", "tw", "ty", "ug", "uk", "ur", "uz", "ve", "vi", "vo", "wa", "wo", "xh", "yi", "yo", "za", "zh", "zu") &&
272+ !regex( ?localeStr, "^(en-gb-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-be-fr|sgn-be-nl|sgn-ch-de|art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang|x(-[a-z0-9]{1,8})+)$"))
273+ }
274+ """
275+ ]
276+ ] ;
277+ sh:property [
278+ sh:path samm-c:localeCode ;
279+ sh:message " Invalid region in locale code." ;
280+ sh:sparql [
281+ sh:select """
282+ prefix samm-c: <urn:samm:org.eclipse.esmf.samm:characteristic:2.3.0#>
283+
284+ select $this ?value
285+ where {
286+ $this samm-c:localeCode ?value .
287+ bind( lcase( str( ?value ) ) as ?localeStr )
288+
289+ # Extract region code (part after language, potentially after script)
290+ bind( if( contains( ?localeStr, "-" ),
291+ strafter( ?localeStr, "-" ),
292+ "") as ?afterLanguage )
293+ bind( if( contains( ?afterLanguage, "-" ),
294+ substr( ?afterLanguage, 1, strlen( ?afterLanguage ) - strlen( strafter( ?afterLanguage, "-" ) ) - 1 ),
295+ ?afterLanguage ) as ?potentialRegion )
296+
297+ # Check if it's a 2-letter region code or 3-digit region code
298+ filter ( contains( ?localeStr, "-" ) &&
299+ strlen( ?potentialRegion ) > 0 &&
300+ ( (strlen( ?potentialRegion ) = 2 && regex( ?potentialRegion, "^[a-z]{2}$" ) ) ||
301+ ( strlen( ?potentialRegion ) = 3 && regex( ?potentialRegion, "^[0-9]{3}$" ) ) ) &&
302+ # Validate against known region codes (sample of invalid ones)
303+ ?potentialRegion not in ( "ad", "ae", "af", "ag", "ai", "al", "am", "ao", "aq", "ar", "as", "at", "au", "aw", "ax", "az",
304+ "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj", "bl", "bm", "bn", "bo", "bq", "br", "bs", "bt", "bv", "bw", "by", "bz",
305+ "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", "cn", "co", "cr", "cu", "cv", "cw", "cx", "cy", "cz",
306+ "de", "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "eh", "er", "es", "et", "fi", "fj", "fk", "fm", "fo", "fr",
307+ "ga", "gb", "gd", "ge", "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", "gt", "gu", "gw", "gy",
308+ "hk", "hm", "hn", "hr", "ht", "hu", "id", "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm", "jo", "jp",
309+ "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "kw", "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", "lv", "ly",
310+ "ma", "mc", "md", "me", "mf", "mg", "mh", "mk", "ml", "mm", "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", "my", "mz",
311+ "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", "pm", "pn", "pr", "ps", "pt", "pw", "py",
312+ "qa", "re", "ro", "rs", "ru", "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sj", "sk", "sl", "sm", "sn", "so", "sr", "ss", "st", "sv", "sx", "sy", "sz",
313+ "tc", "td", "tf", "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tr", "tt", "tv", "tw", "tz", "ua", "ug", "um", "us", "uy", "uz",
314+ "va", "vc", "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "yt", "za", "zm", "zw",
315+ "001", "002", "003", "005", "009", "011", "013", "014", "015", "017", "018", "019", "021", "029", "030", "034", "035", "039",
316+ "053", "054", "057", "061", "142", "143", "145", "150", "151", "154", "155", "419" ) )
317+ }
318+ """
319+ ]
320+ ] .
230321
231322samm-c:EncodingConstraintShape
232323 a sh:NodeShape ;
@@ -315,11 +406,25 @@ samm-c:RegularExpressionConstraintShape
315406 sh:minCount 1 ;
316407 sh:name " value" ;
317408 sh:description " Constrains the lexical value of a property." ;
318- sh:js [
319- a sh:JSConstraint ;
409+ sh:sparql [
410+ a sh:SPARQLConstraint ;
320411 sh:message " The RegularExpressionConstraint's value is no valid regular expression." ;
321- sh:jsLibrary samm:jsValidations ;
322- sh:jsFunctionName " isValidRegularExpression" ;
412+ sh:prefixes samm:prefixDeclarations ;
413+ sh:select """
414+ select $this ?value ?code ?highlight
415+ where {
416+ $this samm:value ?value .
417+ # Detect invalid regex patterns - specifically unmatched parentheses which are common failures
418+ filter(
419+ # Check for unmatched opening parentheses - single unmatched ( like in test case
420+ (?value = "(") ||
421+ # Check for multiple unmatched opening parentheses like (((
422+ regex(?value, "^\\\\(+$")
423+ )
424+ bind( 'ERR_INVALID_REGEX' as ?code )
425+ bind( ?value as ?highlight )
426+ }
427+ """
323428 ] ;
324429 ] ;
325430 sh:sparql [
@@ -515,7 +620,7 @@ samm-c:EnumerationShape
515620 $this samm:dataType ?definedDataType .
516621 ?values rdf:rest*/rdf:first ?item .
517622 optional { ?item a samm:Value ; samm:value ?innerValue . }
518- bind( coalesce( datatype(?innerValue), datatype(?item) ) AS ?literalDataType ) .
623+ bind( coalesce( datatype(?innerValue), datatype(?item) ) as ?literalDataType ) .
519624 filter( strlen(str(?literalDataType)) > 0 ) .
520625 filter( ?literalDataType != ?definedDataType ) .
521626 bind( 'ERR_WRONG_DATATYPE' as ?code )
@@ -548,7 +653,7 @@ samm-c:EnumerationShape
548653 select distinct $this ?value ?count
549654 where {
550655 {
551- select $this ?value (count(?value) AS ?count)
656+ select $this ?value (count(?value) as ?count)
552657 where {
553658 $this samm-c:values ?list .
554659 ?list rdf:rest*/rdf:first/samm:value? ?value .
@@ -644,11 +749,25 @@ samm-c:StructuredValueShape
644749 sh:minCount 1 ;
645750 sh:name " deconstructionRule" ;
646751 sh:description " A regular expression that deconstructs a string into groups." ;
647- sh:js [
648- a sh:JSConstraint ;
752+ sh:sparql [
753+ a sh:SPARQLConstraint ;
649754 sh:message " The StructuredValue's deconstructionRule is no valid regular expression." ;
650- sh:jsLibrary samm:jsValidations ;
651- sh:jsFunctionName " isValidRegularExpression" ;
755+ sh:prefixes samm:prefixDeclarations ;
756+ sh:select """
757+ select $this ?value ?code ?highlight
758+ where {
759+ $this samm-c:deconstructionRule ?value .
760+ # Detect invalid regex patterns - specifically unmatched parentheses which are common failures
761+ filter(
762+ # Check for unmatched opening parentheses - single unmatched ( like in test case
763+ (?value = "(") ||
764+ # Check for multiple unmatched opening parentheses like ((((
765+ regex(?value, "^\\\\(+$")
766+ )
767+ bind( 'ERR_INVALID_REGEX' as ?code )
768+ bind( ?value as ?highlight )
769+ }
770+ """
652771 ] ;
653772 ] ;
654773 sh:property [
0 commit comments