diff --git a/setup.py b/setup.py index c871a265..ed295e0c 100644 --- a/setup.py +++ b/setup.py @@ -80,7 +80,8 @@ def read(*names, **kwargs): 'aspect': ['aspectlib'], 'histogram': ['pygal', 'pygaljs'], ':python_version < "3.4"': ['statistics', 'pathlib2'], - 'elasticsearch': ['elasticsearch'] + 'elasticsearch': ['elasticsearch'], + 's3': ['boto3'], }, entry_points={ 'pytest11': [ diff --git a/src/pytest_benchmark/storage/s3.py b/src/pytest_benchmark/storage/s3.py new file mode 100644 index 00000000..b90690de --- /dev/null +++ b/src/pytest_benchmark/storage/s3.py @@ -0,0 +1,158 @@ +from __future__ import absolute_import + +import json +import os +import re +import sys + +from ..compat import reraise +from ..stats import normalize_stats +from ..utils import Path +from ..utils import commonpath +from ..utils import safe_dumps +from ..utils import short_filename +from ..utils import urlparse + +try: + from boto3.session import Session as boto3_session + from botocore.exceptions import ClientError +except ImportError as exc: + reraise(ImportError, ImportError("Please install boto3 or pytest-benchmark[s3]", exc.args), + sys.exc_info()[2]) + + +class S3Storage(object): + def __init__(self, path, logger, client=None, default_machine_id=None): + self.store = urlparse(path) + self.bucket = self.store.netloc + self.path = Path(self.store.path.strip("/")) + self.default_machine_id = default_machine_id + if not client: + session = boto3_session() + client = session.client("s3") + self.client = client + self.logger = logger + self._cache = {} + + def __str__(self): + return self.store.geturl() + + @property + def location(self): + return self.store.geturl() + + def get(self, name): + path = self.path.joinpath(self.default_machine_id) if self.default_machine_id else self.path + return path.joinpath(name) + + @property + def _next_num(self): + files = self.query("[0-9][0-9][0-9][0-9]_.+") + files.sort(reverse=True) + if not files: + return "0001" + for f in files: + try: + return "%04i" % (int(str(f.name).split('_')[0]) + 1) + except ValueError: + raise + + def exists(self, bucket, key): + try: + return self.client.head_object(Bucket=bucket, Key=key) + except ClientError: + return False + + def load_from_s3(self, key): + response = self.client.get_object(Bucket=self.bucket, Key=key) + return response["Body"].read().decode() + + def _create_s3_url(self, key): + return "s3://%s/%s" % (self.bucket, key) + + def save(self, output_json, save): + output_file = str(self.get("%s_%s.json" % (self._next_num, save))) + assert not self.exists(self.bucket, output_file) + self.client.put_object( + Bucket=self.bucket, + Key=output_file, + Body=safe_dumps(output_json, ensure_ascii=True, indent=4).encode(), + ) + self.logger.info("Saved benchmark data in: %s" % self._create_s3_url(output_file)) + + def query(self, *globs_or_files): + files = [] + globs = [] + if not globs_or_files: + globs_or_files = r".+", + + for globish in globs_or_files: + candidate = urlparse(globish) + if candidate.scheme == "s3": + if self.exists(candidate.netloc, candidate.path): + files.append(candidate.geturl()) + continue + + parts = candidate.path.split("/") + if len(parts) > 2: + raise ValueError("{0!r} isn't an existing file or acceptable glob. " + "Expected 'platform-glob/filename-glob' or 'filename-glob'.".format(globish)) + elif len(parts) == 2: + platform_glob, filename_glob = parts + else: + platform_glob = self.default_machine_id or r".+" + filename_glob, = parts or [''] + + filename_glob = filename_glob.rstrip(".+") + r".+\.json" + globs.append((platform_glob, filename_glob)) + + def _list_files(filter): + paginator = self.client.get_paginator('list_objects_v2') + pages = paginator.paginate(Bucket=self.bucket, Prefix=str(self.path)) + for page in pages: + contents = page.get('Contents', []) + for content in contents: + if re.search(filter, content["Key"]): + yield Path(content["Key"]) + + for platform_glob, filename_glob in globs: + files.extend(_list_files(os.path.join(platform_glob, filename_glob))) + + return sorted(files, key=lambda file: (file.name, file.parent)) + + def load(self, *globs_or_files): + if not globs_or_files: + globs_or_files = '[0-9][0-9][0-9][0-9]_', + + for file in self.query(*globs_or_files): + if file in self._cache: + data = self._cache[file] + else: + try: + data = json.loads(self.load_from_s3(str(file))) + for bench in data["benchmarks"]: + normalize_stats(bench["stats"]) + except Exception as exc: + self.logger.warn("Failed to load {0}: {1}".format(file, exc)) + continue + self._cache[file] = data + try: + relpath = file.relative_to(self.path) + except ValueError: + relpath = file + yield relpath, data + + def load_benchmarks(self, *globs_or_files): + sources = [ + (short_filename(path), path, data) + for path, data in self.load(*globs_or_files) + ] + common = len(commonpath([src for src, _, _ in sources])) if sources else 0 + for source, path, data in sources: + source = source[common:].lstrip(r'\/') + + for bench in data["benchmarks"]: + bench.update(bench.pop("stats")) + bench['path'] = os.path.join(self.store.geturl(), str(path)) + bench['source'] = source + yield bench diff --git a/src/pytest_benchmark/utils.py b/src/pytest_benchmark/utils.py index 1d11ce44..6fcc509f 100644 --- a/src/pytest_benchmark/utils.py +++ b/src/pytest_benchmark/utils.py @@ -466,6 +466,10 @@ def load_storage(storage, **kwargs): if storage.startswith("file://"): from .storage.file import FileStorage return FileStorage(storage[len("file://"):], **kwargs) + elif storage.startswith("s3://"): + from .storage.s3 import S3Storage + # TODO update benchmark_autosave + return S3Storage(storage, **kwargs) elif storage.startswith("elasticsearch+"): from .storage.elasticsearch import ElasticsearchStorage # TODO update benchmark_autosave @@ -473,7 +477,7 @@ def load_storage(storage, **kwargs): netrc_file=netrc_file) return ElasticsearchStorage(*args, **kwargs) else: - raise argparse.ArgumentTypeError("Storage must be in form of file://path or " + raise argparse.ArgumentTypeError("Storage must be in form of file://path or s3://path or" "elasticsearch+http[s]://host1,host2/index/doctype") diff --git a/tests/test_s3_storage.py b/tests/test_s3_storage.py new file mode 100644 index 00000000..4334b8b6 --- /dev/null +++ b/tests/test_s3_storage.py @@ -0,0 +1,252 @@ +from __future__ import absolute_import + +import json +import logging +from io import BytesIO +from io import StringIO + +import py +import pytest +from freezegun import freeze_time + +from pytest_benchmark import plugin +from pytest_benchmark.plugin import BenchmarkSession +from pytest_benchmark.plugin import pytest_benchmark_compare_machine_info +from pytest_benchmark.plugin import pytest_benchmark_generate_json +from pytest_benchmark.plugin import pytest_benchmark_group_stats +from pytest_benchmark.storage.s3 import S3Storage + +try: + import unittest.mock as mock +except ImportError: + import mock + +logger = logging.getLogger(__name__) + +THIS = py.path.local(__file__) +BENCHFILE = THIS.dirpath('test_storage/0030_5b78858eb718649a31fb93d8dc96ca2cee41a4cd_20150815_030419_uncommitted-changes.json') + +SAVE_DATA = json.loads(BENCHFILE.read_text(encoding='utf8')) +SAVE_DATA["machine_info"] = {'foo': 'bar'} +SAVE_DATA["commit_info"] = {'foo': 'bar'} + + +class Namespace(object): + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + def __getitem__(self, item): + return self.__dict__[item] + + +class LooseFileLike(BytesIO): + def close(self): + value = self.getvalue() + super(LooseFileLike, self).close() + self.getvalue = lambda: value + + +class MockStorage(S3Storage): + def __init__(self, *args, **kwargs): + super(MockStorage, self).__init__(*args, **kwargs) + m = mock.Mock() + m.get_paginator.return_value.paginate.return_value = [] + m.head_object.return_value = False + m.put_object.return_value = True + self.client = m + + +class MockSession(BenchmarkSession): + def __init__(self): + self.verbose = False + self.histogram = True + self.benchmarks = [] + self.performance_regressions = [] + self.sort = u"min" + self.compare = '0001' + self.logger = logging.getLogger(__name__) + self.machine_id = "FoobarOS" + self.machine_info = {'foo': 'bar'} + self.save = self.autosave = self.json = False + self.options = { + 'min_rounds': 123, + 'min_time': 234, + 'max_time': 345, + } + self.compare_fail = [] + self.config = Namespace(hook=Namespace( + pytest_benchmark_group_stats=pytest_benchmark_group_stats, + pytest_benchmark_generate_machine_info=lambda **kwargs: {'foo': 'bar'}, + pytest_benchmark_update_machine_info=lambda **kwargs: None, + pytest_benchmark_compare_machine_info=pytest_benchmark_compare_machine_info, + pytest_benchmark_generate_json=pytest_benchmark_generate_json, + pytest_benchmark_update_json=lambda **kwargs: None, + pytest_benchmark_generate_commit_info=lambda **kwargs: {'foo': 'bar'}, + pytest_benchmark_update_commit_info=lambda **kwargs: None, + )) + self.storage = MockStorage("s3://mocked/dir", default_machine_id="FoobarOS", logger=self.logger) + self.group_by = 'group' + self.columns = ['min', 'max', 'mean', 'stddev', 'median', 'iqr', + 'outliers', 'rounds', 'iterations'] + self.benchmarks = [] + data = json.loads(BENCHFILE.read_text(encoding='utf8')) + self.benchmarks.extend( + Namespace( + as_dict=lambda include_data=False, stats=True, flat=False, _bench=bench: + dict(_bench, **_bench["stats"]) if flat else dict(_bench), + name=bench['name'], + fullname=bench['fullname'], + group=bench['group'], + options=bench['options'], + has_error=False, + params=None, + **bench['stats'] + ) + for bench in data['benchmarks'] + ) + + +try: + text_type = unicode +except NameError: + text_type = str + + +def force_text(text): + if isinstance(text, text_type): + return text + else: + return text.decode('utf-8') + + +def force_bytes(text): + if isinstance(text, text_type): + return text.encode('utf-8') + else: + return text + + +def make_logger(sess): + output = StringIO() + sess.logger = Namespace( + info=lambda text, **opts: output.write(force_text(text) + u'\n'), + error=lambda text: output.write(force_text(text) + u'\n'), + ) + sess.storage.logger = Namespace( + info=lambda text, **opts: output.write(force_text(text) + u'\n'), + error=lambda text: output.write(force_text(text) + u'\n'), + ) + return output + + +@pytest.fixture +def sess(): + return MockSession() + + +@pytest.fixture +def logger_output(sess): + return make_logger(sess) + + +@freeze_time("2015-08-15T00:04:18.687119") +def test_handle_saving(sess, tmpdir, monkeypatch): + monkeypatch.setattr(plugin, '__version__', '2.5.0') + sess.save = "commitId" + sess.autosave = True + sess.json = None + sess.save_data = False + sess.handle_saving() + sess.storage.client.head_object.assert_called_with( + Bucket='mocked', + Key='dir/FoobarOS/0001_commitId.json', + ) + + args = sess.storage.client.put_object.call_args[1] + assert args["Bucket"] == "mocked" + assert args["Key"] == "dir/FoobarOS/0001_commitId.json" + assert json.loads(args["Body"].decode()) == SAVE_DATA + + +def test_s3_list_files(): + """Check if storage return the right next_num value.""" + m = mock.Mock() + m.get_paginator.return_value.paginate.return_value = [ + { + "ResponseMetadata": {"HTTPStatusCode": 200}, + "Contents": [ + {"Key": "FoobarOS/0001_commitId.json"}, + {"Key": "FoobarOS/0002_commitId.json"}, + {"Key": "FoobarOS/0003_commitId.json"}, + {"Key": "CentOS/0001_commitId.json"} + ] + } + ] + m.head_object.return_value = False + storage = S3Storage( + "s3://my-bucket", + logging.getLogger(__name__), + default_machine_id="FoobarOS", + client=m + ) + assert str(storage) == "s3://my-bucket" + assert storage.location == "s3://my-bucket" + assert storage._next_num == "0004" + + +def test_s3_load_single(): + """Test when loading only one benchmark.""" + m = mock.Mock() + m.get_paginator.return_value.paginate.return_value = [ + { + "ResponseMetadata": {"HTTPStatusCode": 200}, + "Contents": [ + {"Key": "FoobarOS/0001_commitId.json"}, + {"Key": "FoobarOS/0002_commitId.json"} + ] + } + ] + m.head_object.return_value = False + m.get_object.return_value = { + "Body": BytesIO(json.dumps(SAVE_DATA).encode()) + } + storage = S3Storage( + "s3://my-bucket", + logging.getLogger(__name__), + default_machine_id="FoobarOS", + client=m + ) + b = list(storage.load_benchmarks("0001")) + assert b[0]["path"] == "s3://my-bucket/FoobarOS/0001_commitId.json" + assert m.get_object.call_count == 1 + + +def test_s3_load_multi(): + """Test when loading multiple benchmark.""" + m = mock.Mock() + m.get_paginator.return_value.paginate.return_value = [ + { + "ResponseMetadata": {"HTTPStatusCode": 200}, + "Contents": [ + {"Key": "FoobarOS/0001_commitId.json"}, + {"Key": "FoobarOS/0002_commitId.json"}, + {"Key": "FoobarOS/0003_commitId.json"} + ] + } + ] + m.head_object.return_value = False + m.get_object.side_effect = [ + {"Body": BytesIO(json.dumps(SAVE_DATA).encode())}, + {"Body": BytesIO(json.dumps(SAVE_DATA).encode())}, + {"Body": BytesIO(json.dumps(SAVE_DATA).encode())}, + ] + + storage = S3Storage( + "s3://my-bucket", + logging.getLogger(__name__), + default_machine_id="FoobarOS", + client=m + ) + b = list(storage.load_benchmarks()) + assert len(b) == 3 + assert m.get_object.call_count == 3 diff --git a/tox.ini b/tox.ini index e4728163..9cfc87f9 100644 --- a/tox.ini +++ b/tox.ini @@ -51,6 +51,7 @@ deps = freezegun==0.3.12 hunter elasticsearch==7.1.0 + boto3 commands = cover: {posargs:py.test --cov=src --cov-report=term-missing --cov-append -vv} nocov: {posargs:py.test -vv tests}