Skip to content

Commit 3560b60

Browse files
author
hornik
committed
Check formal validity of language tags in DESCRIPTION Language field (PR#18818).
git-svn-id: https://svn.r-project.org/R/trunk@87349 00db46b3-68df-0310-9c12-caf00c1e9a41
1 parent 2656741 commit 3560b60

File tree

2 files changed

+60
-0
lines changed

2 files changed

+60
-0
lines changed

src/library/tools/R/check.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,6 +1194,19 @@ add_dummies <- function(dir, Log)
11941194
}
11951195
}
11961196

1197+
if(!is.na(lang <- db["Language"])) {
1198+
s <- unlist(strsplit(lang, ", *"), use.names = FALSE)
1199+
s <- s[!grepl(re_anchor(.make_RFC4646_langtag_regexp()), s)]
1200+
if(length(s)) {
1201+
if(!any) noteLog(Log)
1202+
any <- TRUE
1203+
printLog(Log,
1204+
paste(c("Language field contains the following invalid language tags:",
1205+
paste0(" ", s)),
1206+
collapse = "\n"),
1207+
"\n")
1208+
}
1209+
}
11971210

11981211
out <- format(.check_package_description2(dfile))
11991212
if (length(out)) {

src/library/tools/R/utils.R

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1842,6 +1842,53 @@ function(parent = parent.frame())
18421842
}
18431843
})
18441844

1845+
### ** .make_RFC4646_langtag_regexp
1846+
1847+
.make_RFC4646_langtag_regexp <-
1848+
function()
1849+
{
1850+
## See <https://www.ietf.org/rfc/rfc4646.html>.
1851+
## Language tags can be of the form (in ABNF, see
1852+
## <https://tools.ietf.org/rfc/rfc4234.txt>):
1853+
## langtag / privateuse / grandfathered
1854+
## where
1855+
## privateuse = ("x"/"X") 1*("-" (1*8alphanum))
1856+
## grandfathered = 1*3ALPHA 1*2("-" (2*8alphanum))
1857+
## We only allow langtag, for which in turn we have
1858+
## (language
1859+
## ["-" script]
1860+
## ["-" region]
1861+
## *(["-" variant])
1862+
## *(["-" extension])
1863+
## ["-" privateuse]
1864+
## where
1865+
## language = (2*3ALPHA [-extlang]) ; shortest ISO 639 code
1866+
## / 4ALPHA ; reserved for future use
1867+
## / 5*8ALPHA ; registered language subtag
1868+
## extlang = *3("-" 3*ALPHA) ; reserved for future use
1869+
## script = 4ALPHA ; ISO 15924 code
1870+
## region = 2ALPHA ; ISO 3166 code
1871+
## / 3DIGIT ; UN M.49 code
1872+
## variant = 5*8alphanum ; registered variants
1873+
## / (DIGIT 3alphanum)
1874+
## extension = singleton 1*("-" (2*8alphanum))
1875+
## singleton = %x41-57 / %x59-5A / %x61-77 / %x79-7A / DIGIT
1876+
## ; "a"-"w" / "y"-"z" / "A"-"W" / "Y"-"Z" / "0"-"9"
1877+
## alphanum = (ALPHA / DIGIT) ; letters and numbers
1878+
1879+
re_extlang <- "[[:alpha:]]{3}"
1880+
re_language <-
1881+
sprintf("[[:alpha:]]{2,3}(-%s){0,3}|[[:alpha:]]{4,8}", re_extlang)
1882+
re_script <- "[[:alpha:]]{4}"
1883+
re_region <- "[[:alpha:]]{2}|[[:digit:]]{3}"
1884+
re_variant <- "[[:alnum:]]{5,8}|[[:digit:]][[:alnum:]]{3}"
1885+
re_singleton <- "[abcdefghijklmnopqrstuvwyzABCDEFGHIJKLMNOPQRSTUVWYZ0123456789]"
1886+
re_extension <- sprintf("(%s)(-[[:alnum:]]{2,8}){1,}", re_singleton)
1887+
1888+
sprintf("(%s)((-%s)?)((-%s)?)((-%s)*)((-%s)*)",
1889+
re_language, re_script, re_region, re_variant, re_extension)
1890+
}
1891+
18451892
### ** nonS3methods [was .make_S3_methods_stop_list ]
18461893

18471894
nonS3methods <- function(package)

0 commit comments

Comments
 (0)