Skip to content

Commit 0981311

Browse files
authored
Merge branch 'RDFLib:master' into remove-film-example
2 parents e4b7aca + 24d6070 commit 0981311

File tree

1,875 files changed

+623891
-4
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,875 files changed

+623891
-4
lines changed

test/data/README.md

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,29 @@
1-
# Consistent Test Data
1+
# Test Data
22

3-
This directory contains consistent graphs that can be used inside tests, the
4-
graphs in this directory should not change.
3+
This directory contains data for use inside tests, ideally the data in this
4+
directory should be constant and should not change, and in general non-original
5+
data that is widely known is preferred to original data as well known data has
6+
well known attributes and qualities that can make it easier to reason about.
57

68

79
## File origins
810

911
- `rdfs.ttl`: `http://www.w3.org/2000/01/rdf-schema#`
12+
13+
## Fetcher
14+
15+
Files that originate from the internet should be downloaded using `fetcher.py`
16+
so we can easily verify the integrity of the files by re-running `fetcher.py`.
17+
18+
```bash
19+
# run in repo root
20+
21+
# fetch everything
22+
.venv/bin/python3 test/data/fetcher.py
23+
24+
# only fetch single file
25+
.venv/bin/python3 test/data/fetcher.py test/data/rdfs.ttl
26+
27+
# only fetch files below path:
28+
.venv/bin/python3 test/data/fetcher.py test/data/suites
29+
```

test/data/fetcher.py

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
#!/usr/bin/env python
2+
import argparse
3+
import enum
4+
import logging
5+
import os
6+
import random
7+
import re
8+
import shutil
9+
import string
10+
import sys
11+
import tarfile
12+
from contextlib import ExitStack, contextmanager
13+
from dataclasses import dataclass, field
14+
from pathlib import Path
15+
from tarfile import TarFile, TarInfo
16+
from tempfile import TemporaryDirectory, mkdtemp
17+
from typing import IO, Generator, List, Pattern, Union
18+
from urllib.request import Request, urlopen
19+
from zipfile import ZipFile, ZipInfo
20+
21+
DATA_PATH = Path(__file__).parent
22+
23+
24+
@dataclass
25+
class Resource:
26+
remote: Union[str, Request]
27+
local_path: Path
28+
29+
def fetch(self, tmp_path: Path) -> None:
30+
raise NotImplementedError()
31+
32+
33+
@dataclass
34+
class FileResource(Resource):
35+
def fetch(self, tmp_path: Path) -> None:
36+
if self.local_path.exists():
37+
logging.debug("info %s", self.local_path)
38+
os.remove(self.local_path)
39+
40+
with ExitStack() as xstack:
41+
request = (
42+
self.remote
43+
if isinstance(self.remote, Request)
44+
else Request(self.remote)
45+
)
46+
response = urlopen(request)
47+
remote_io: IO[bytes] = xstack.enter_context(response)
48+
49+
local_io = xstack.enter_context(self.local_path.open("wb+"))
50+
shutil.copyfileobj(remote_io, local_io)
51+
52+
logging.info("Downloaded %s to %s", request.full_url, self.local_path)
53+
54+
55+
class ArchiveType(enum.Enum):
56+
ZIP = "zip"
57+
TAR_GZ = "tar.gz"
58+
59+
60+
@dataclass
61+
class ArchiveResource(Resource):
62+
type: ArchiveType
63+
pattern: Pattern[str]
64+
65+
def fetch(self, tmp_path: Path) -> None:
66+
if self.local_path.exists():
67+
logging.debug("info %s", self.local_path)
68+
shutil.rmtree(self.local_path)
69+
with ExitStack() as xstack:
70+
request = (
71+
self.remote
72+
if isinstance(self.remote, Request)
73+
else Request(self.remote)
74+
)
75+
response = urlopen(request)
76+
remote_io: IO[bytes] = xstack.enter_context(response)
77+
name = (
78+
"".join(
79+
random.choices(
80+
string.ascii_uppercase + string.digits + string.ascii_lowercase,
81+
k=10,
82+
)
83+
)
84+
+ f".{self.type.value}"
85+
)
86+
tmp_file = tmp_path / name
87+
logging.info("fetching %s to temp file %s", self.remote, tmp_file)
88+
with tmp_file.open("wb+") as tmp_io:
89+
shutil.copyfileobj(remote_io, tmp_io)
90+
91+
archive_file: Union[ZipFile, TarFile]
92+
if self.type is ArchiveType.ZIP:
93+
archive_file = xstack.enter_context(ZipFile(tmp_file))
94+
elif self.type is ArchiveType.TAR_GZ:
95+
archive_file = xstack.enter_context(tarfile.open(tmp_file, mode="r:gz"))
96+
# archive_file = xstack.enter_context(TarFile(tmp_file, mode="r|gz"))
97+
else:
98+
raise ValueError(f"invalid type {self.type}")
99+
100+
for member_info in self._member_list(archive_file):
101+
member_filename = self._member_filename(member_info)
102+
if self._member_isdir(member_info):
103+
logging.debug("Ignoring directory %s", member_filename)
104+
continue
105+
106+
match = self.pattern.match(member_filename)
107+
if match is None:
108+
logging.debug("Ignoring unmatched %s", member_filename)
109+
continue
110+
groups = match.groups()
111+
if len(groups) > 0:
112+
dest_filename = groups[0]
113+
114+
member_io: IO[bytes]
115+
with self._member_io(archive_file, member_info) as member_io:
116+
local_file = self.local_path / dest_filename
117+
if not local_file.parent.exists():
118+
local_file.parent.mkdir(parents=True)
119+
logging.debug("writing %s to %s", member_filename, local_file)
120+
local_file.write_bytes(member_io.read())
121+
122+
logging.info(
123+
"Downloaded %s and extracted files matching %s to %s",
124+
request.full_url,
125+
self.pattern,
126+
self.local_path,
127+
)
128+
129+
@classmethod
130+
def _member_list(
131+
cls, archive: Union[ZipFile, TarFile]
132+
) -> Union[List[ZipInfo], List[TarInfo]]:
133+
if isinstance(archive, ZipFile):
134+
return archive.infolist()
135+
return archive.getmembers()
136+
137+
@classmethod
138+
def _member_isdir(cls, member_info: Union[ZipInfo, TarInfo]) -> bool:
139+
if isinstance(member_info, ZipInfo):
140+
return member_info.is_dir()
141+
return member_info.isdir()
142+
143+
@classmethod
144+
def _member_filename(cls, member_info: Union[ZipInfo, TarInfo]) -> str:
145+
if isinstance(member_info, ZipInfo):
146+
return member_info.filename
147+
return member_info.name
148+
149+
@classmethod
150+
@contextmanager
151+
def _member_io(
152+
cls, archive: Union[ZipFile, TarFile], member_info: Union[ZipInfo, TarInfo]
153+
) -> Generator[IO[bytes], None, None]:
154+
if isinstance(archive, ZipFile):
155+
assert isinstance(member_info, ZipInfo)
156+
with archive.open(member_info) as member_io:
157+
yield member_io
158+
else:
159+
assert isinstance(member_info, TarInfo)
160+
opt_io = archive.extractfile(member_info)
161+
assert opt_io is not None
162+
yield opt_io
163+
164+
165+
RESOURCES: List[Resource] = [
166+
ArchiveResource(
167+
remote="https://github.com/w3c/N3/archive/c44d123c5958ca04117e28ca3769e2c0820f72e6.zip",
168+
local_path=(DATA_PATH / "suites" / "w3c" / "n3"),
169+
type=ArchiveType.ZIP,
170+
pattern=re.compile(r"^[^\/]+[\/]tests[\/](.+)$"),
171+
),
172+
ArchiveResource(
173+
remote="https://www.w3.org/2013/TurtleTests/TESTS.tar.gz",
174+
local_path=(DATA_PATH / "suites" / "w3c" / "turtle"),
175+
type=ArchiveType.TAR_GZ,
176+
pattern=re.compile(r"^[^\/]+[\/](.+)$"),
177+
),
178+
ArchiveResource(
179+
remote="https://www.w3.org/2013/N-QuadsTests/TESTS.tar.gz",
180+
local_path=(DATA_PATH / "suites" / "w3c" / "nquads"),
181+
type=ArchiveType.TAR_GZ,
182+
pattern=re.compile(r"^(.+)$"),
183+
),
184+
ArchiveResource(
185+
remote="https://www.w3.org/2013/N-TriplesTests/TESTS.tar.gz",
186+
local_path=(DATA_PATH / "suites" / "w3c" / "ntriples"),
187+
type=ArchiveType.TAR_GZ,
188+
pattern=re.compile(r"^(.+)$"),
189+
),
190+
ArchiveResource(
191+
remote="https://www.w3.org/2013/TrigTests/TESTS.tar.gz",
192+
local_path=(DATA_PATH / "suites" / "w3c" / "trig"),
193+
type=ArchiveType.TAR_GZ,
194+
pattern=re.compile(r"^(.+)$"),
195+
),
196+
# NOTE: Commented out as these files contains local modifications.
197+
# ArchiveResource(
198+
# remote="https://www.w3.org/2013/RDFXMLTests/TESTS.zip",
199+
# local_path=(DATA_PATH / "suites" / "w3c" / "rdfxml"),
200+
# type=ArchiveType.ZIP,
201+
# pattern=re.compile(r"^(.+)$"),
202+
# ),
203+
# NOTE: Commented out as this contains local modifications.
204+
# ArchiveResource(
205+
# remote="https://www.w3.org/2009/sparql/docs/tests/sparql11-test-suite-20121023.tar.gz",
206+
# local_path=(DATA_PATH / "suites" / "DAWG" / "data-sparql11"),
207+
# type=ArchiveType.TAR_GZ,
208+
# pattern=re.compile(r"^[^\/]+[\/](.+)$"),
209+
# ),
210+
FileResource(
211+
remote=Request(
212+
"http://www.w3.org/2000/01/rdf-schema#", headers={"Accept": "text/turtle"}
213+
),
214+
local_path=(DATA_PATH / "rdfs.ttl"),
215+
),
216+
]
217+
218+
219+
@dataclass
220+
class Application:
221+
parser: argparse.ArgumentParser = field(
222+
default_factory=lambda: argparse.ArgumentParser(add_help=True)
223+
)
224+
225+
def __post_init__(self) -> None:
226+
parser = self.parser
227+
parser.add_argument(
228+
"-v",
229+
"--verbose",
230+
action="count",
231+
dest="verbosity",
232+
help="increase verbosity level",
233+
)
234+
parser.add_argument(
235+
"--keep-tmp",
236+
action="store_true",
237+
default=False,
238+
)
239+
parser.add_argument("paths", nargs="*", type=str)
240+
parser.set_defaults(handler=self.handle)
241+
242+
def run(self, args: List[str]) -> None:
243+
parse_result = self.parser.parse_args(args)
244+
245+
verbosity = parse_result.verbosity
246+
if verbosity is not None:
247+
root_logger = logging.getLogger("")
248+
root_logger.propagate = True
249+
new_level = (
250+
root_logger.getEffectiveLevel()
251+
- (min(1, verbosity)) * 10
252+
- min(max(0, verbosity - 1), 9) * 1
253+
)
254+
root_logger.setLevel(new_level)
255+
256+
logging.debug(
257+
"args = %s, parse_result = %s, logging.level = %s",
258+
args,
259+
parse_result,
260+
logging.getLogger("").getEffectiveLevel(),
261+
)
262+
263+
parse_result.handler(parse_result)
264+
265+
def handle(self, parse_result: argparse.Namespace) -> None:
266+
logging.debug("entry ...")
267+
268+
paths = {Path(path).absolute() for path in parse_result.paths}
269+
270+
logging.debug("paths = %s", paths)
271+
272+
if parse_result.keep_tmp:
273+
tmp_path = Path(mkdtemp())
274+
else:
275+
tmp_dir = TemporaryDirectory()
276+
tmp_path = Path(tmp_dir.name)
277+
278+
for resource in RESOURCES:
279+
if paths:
280+
include = False
281+
for path in paths:
282+
try:
283+
resource.local_path.absolute().relative_to(path)
284+
include = True
285+
except ValueError:
286+
# not relative to, ignoring
287+
pass
288+
if not include:
289+
logging.info("skipping %s", resource.local_path)
290+
continue
291+
resource.fetch(tmp_path)
292+
293+
294+
def main() -> None:
295+
logging.basicConfig(
296+
level=os.environ.get("PYLOGGING_LEVEL", logging.INFO),
297+
stream=sys.stderr,
298+
datefmt="%Y-%m-%dT%H:%M:%S",
299+
format=(
300+
"%(asctime)s.%(msecs)03d %(process)d %(thread)d %(levelno)03d:%(levelname)-8s "
301+
"%(name)-12s %(module)s:%(lineno)s:%(funcName)s %(message)s"
302+
),
303+
)
304+
305+
Application().run(sys.argv[1:])
306+
307+
308+
if __name__ == "__main__":
309+
main()

test/data/suites/w3c/n3/LICENSE.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
All documents in this Repository are licensed by contributors
2+
under both the the [W3C Test Suite License](http://www.w3.org/Consortium/Legal/2008/04-testsuite-license) and
3+
[W3C Software and Document License](https://www.w3.org/Consortium/Legal/copyright-software).
4+

0 commit comments

Comments
 (0)