Skip to content

Commit 7f755ec

Browse files
author
Adrian
authored
Refactored vsync to use threading and work with Python 3 (CADC-8744) (#198)
* Refactored vsync to use threading and work with Python 3
1 parent e11c30a commit 7f755ec

File tree

6 files changed

+731
-375
lines changed

6 files changed

+731
-375
lines changed

vos/setup.cfg

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ license = AGPLv3
4848
url = https://www.canfar.net/en/docs/storage
4949
edit_on_github = False
5050
github_project = opencadc/vostools
51-
install_requires = html2text>=2016.5.29 cadcutils>=1.2.1 future aenum
51+
install_requires = html2text>=2016.5.29 cadcutils>=1.2.6 future aenum
5252
# version should be PEP440 compatible (http://www.python.org/dev/peps/pep-0440)
53-
version = 3.3.4
53+
version = 3.3.5
5454

5555

5656
[entry_points]
@@ -67,3 +67,4 @@ vrmdir = vos.commands.vrmdir:vrmdir
6767
vsync = vos.commands.vsync:vsync
6868
vtag = vos.commands.vtag:vtag
6969
vos-config = vos.vosconfig:vos_config_main
70+
Lines changed: 379 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,379 @@
1+
# # -*- coding: utf-8 -*-
2+
# ***********************************************************************
3+
# ****************** CANADIAN ASTRONOMY DATA CENTRE *******************
4+
# ************* CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES **************
5+
#
6+
# (c) 2021. (c) 2021.
7+
# Government of Canada Gouvernement du Canada
8+
# National Research Council Conseil national de recherches
9+
# Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6
10+
# All rights reserved Tous droits réservés
11+
#
12+
# NRC disclaims any warranties, Le CNRC dénie toute garantie
13+
# expressed, implied, or énoncée, implicite ou légale,
14+
# statutory, of any kind with de quelque nature que ce
15+
# respect to the software, soit, concernant le logiciel,
16+
# including without limitation y compris sans restriction
17+
# any warranty of merchantability toute garantie de valeur
18+
# or fitness for a particular marchande ou de pertinence
19+
# purpose. NRC shall not be pour un usage particulier.
20+
# liable in any event for any Le CNRC ne pourra en aucun cas
21+
# damages, whether direct or être tenu responsable de tout
22+
# indirect, special or general, dommage, direct ou indirect,
23+
# consequential or incidental, particulier ou général,
24+
# arising from the use of the accessoire ou fortuit, résultant
25+
# software. Neither the name de l'utilisation du logiciel. Ni
26+
# of the National Research le nom du Conseil National de
27+
# Council of Canada nor the Recherches du Canada ni les noms
28+
# names of its contributors may de ses participants ne peuvent
29+
# be used to endorse or promote être utilisés pour approuver ou
30+
# products derived from this promouvoir les produits dérivés
31+
# software without specific prior de ce logiciel sans autorisation
32+
# written permission. préalable et particulière
33+
# par écrit.
34+
#
35+
# This file is part of the Ce fichier fait partie du projet
36+
# OpenCADC project. OpenCADC.
37+
#
38+
# OpenCADC is free software: OpenCADC est un logiciel libre ;
39+
# you can redistribute it and/or vous pouvez le redistribuer ou le
40+
# modify it under the terms of modifier suivant les termes de
41+
# the GNU Affero General Public la “GNU Affero General Public
42+
# License as published by the License” telle que publiée
43+
# Free Software Foundation, par la Free Software Foundation
44+
# either version 3 of the : soit la version 3 de cette
45+
# License, or (at your option) licence, soit (à votre gré)
46+
# any later version. toute version ultérieure.
47+
#
48+
# OpenCADC is distributed in the OpenCADC est distribué
49+
# hope that it will be useful, dans l’espoir qu’il vous
50+
# but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE
51+
# without even the implied GARANTIE : sans même la garantie
52+
# warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ
53+
# or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF
54+
# PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence
55+
# General Public License for Générale Publique GNU Affero
56+
# more details. pour plus de détails.
57+
#
58+
# You should have received Vous devriez avoir reçu une
59+
# a copy of the GNU Affero copie de la Licence Générale
60+
# General Public License along Publique GNU Affero avec
61+
# with OpenCADC. If not, see OpenCADC ; si ce n’est
62+
# <http://www.gnu.org/licenses/>. pas le cas, consultez :
63+
# <http://www.gnu.org/licenses/>.
64+
#
65+
# $Revision: 4 $
66+
#
67+
# ***********************************************************************
68+
#
69+
from __future__ import (absolute_import, division, print_function,
70+
unicode_literals)
71+
72+
import tempfile
73+
import os
74+
import importlib
75+
import datetime
76+
import pytest
77+
import mock
78+
from mock import Mock
79+
import hashlib
80+
81+
from vos.commands.vsync import validate, prepare, build_file_list, execute, \
82+
TransferReport, compute_md5
83+
from cadcutils import exceptions as transfer_exceptions
84+
from vos.vos import ZERO_MD5
85+
86+
87+
def module_patch(*args):
88+
"""
89+
Need to use this instead of mock.patch because vsync module has a function
90+
vsync defined.
91+
Credit: https://stackoverflow.com/questions/52324568/how-to-mock-a-
92+
function-called-in-a-function-inside-a-module-with-the-same-name
93+
:param args:
94+
:return:
95+
"""
96+
target = args[0]
97+
components = target.split('.')
98+
for i in range(len(components), 0, -1):
99+
try:
100+
# attempt to import the module
101+
imported = importlib.import_module('.'.join(components[:i]))
102+
103+
# module was imported, let's use it in the patch
104+
result = mock.patch(*args)
105+
result.getter = lambda: imported
106+
result.attribute = '.'.join(components[i:])
107+
return result
108+
except Exception:
109+
pass
110+
111+
# did not find a module, just return the default mock
112+
return mock.patch(*args)
113+
114+
115+
def test_compute_md5():
116+
tmp_file = tempfile.NamedTemporaryFile()
117+
assert compute_md5(tmp_file.name) == ZERO_MD5
118+
119+
content = b'abc'
120+
open(tmp_file.name, 'wb').write(content)
121+
md5 = hashlib.md5()
122+
md5.update(content)
123+
assert compute_md5(tmp_file.name) == md5.hexdigest()
124+
# try again to use cache
125+
assert compute_md5(tmp_file.name) == md5.hexdigest()
126+
# make cache stalled
127+
content = b'cba'
128+
open(tmp_file.name, 'wb').write(content)
129+
md5 = hashlib.md5()
130+
md5.update(content)
131+
assert compute_md5(tmp_file.name) == md5.hexdigest()
132+
133+
134+
def test_validate():
135+
assert validate('somepath')
136+
assert validate('somepath', exclude='.')
137+
assert not validate('.hiddenfile', exclude='.')
138+
assert not validate('file.fits.tmp', exclude='tmp')
139+
assert not validate('file.fits.tmp', exclude='exe,tmp')
140+
assert validate('somepath', include='.?me.?')
141+
assert not validate('sopath', include='.?me.?')
142+
# exclude wins
143+
assert not validate('somepath', include='.?me.?', exclude='me')
144+
# illegal characters
145+
assert not validate('ab[cd')
146+
147+
148+
def test_prepare():
149+
client = Mock()
150+
vos_location = 'vos:someservice/somedir'
151+
tmp_file = tempfile.NamedTemporaryFile()
152+
assert tmp_file, vos_location == prepare(
153+
tmp_file.name, vos_location, client)
154+
assert not client.mkdir.called
155+
156+
tmp_dir = tempfile.TemporaryDirectory()
157+
src_dir = os.path.join(tmp_dir.name, 'vsyncsrc')
158+
os.mkdir(src_dir)
159+
assert not prepare(src_dir, vos_location, client)
160+
client.mkdir.assert_called_with(vos_location)
161+
162+
# simlinks are not synced
163+
client.mkdir.reset_mock()
164+
link_file = os.path.join(src_dir, 'filelink')
165+
os.symlink(tmp_file.name, link_file)
166+
assert not prepare(link_file, vos_location, client)
167+
assert not client.mkdir.called
168+
169+
# directory exists on the server
170+
client.mkdir.reset_mock()
171+
client.mkdir.side_effect = transfer_exceptions.AlreadyExistsException
172+
tmp_dir = tempfile.TemporaryDirectory()
173+
assert not prepare(tmp_dir.name, vos_location, client)
174+
client.mkdir.assert_called_with(vos_location)
175+
176+
177+
def test_build_file_list():
178+
def check_list(expected, actual):
179+
"""
180+
checks lists of expected files vs actual. Order is determined by
181+
the os.walk function and it's not deterministic so we test just
182+
the existence of elements in the list
183+
"""
184+
assert len(actual) == len(expected)
185+
for item in expected:
186+
assert item in actual
187+
188+
tmp_dir = tempfile.TemporaryDirectory()
189+
src_dir_name = 'syncsrc'
190+
src_dir = os.path.join(tmp_dir.name, src_dir_name)
191+
os.mkdir(src_dir)
192+
# normally name of the src directory is part of this but we keep this
193+
# for simplicity
194+
vos_root = 'vos:someservice/somepath'
195+
196+
def get_vos_path(path, sync_dir):
197+
if sync_dir.endswith('/'):
198+
base_dir = sync_dir
199+
else:
200+
base_dir = os.path.dirname(sync_dir)
201+
uri_path = os.path.relpath(path, base_dir)
202+
return '{}/{}'.format(vos_root, uri_path)
203+
204+
check_list([(src_dir, get_vos_path(src_dir, src_dir))],
205+
build_file_list([src_dir], vos_root))
206+
207+
file1 = 'file1'
208+
file1_path = os.path.join(src_dir, file1)
209+
open(file1_path, 'w').write('test')
210+
211+
expected_list = [(src_dir, get_vos_path(src_dir, src_dir)),
212+
(file1_path, get_vos_path(file1_path, src_dir))]
213+
check_list(expected_list, build_file_list([src_dir], vos_root))
214+
215+
dir1 = 'dir1'
216+
dir1_path = os.path.join(src_dir, dir1)
217+
os.mkdir(dir1_path)
218+
file2 = 'file2'
219+
file2_path = os.path.join(dir1_path, file2)
220+
open(file2_path, 'w').write('test')
221+
dir2 = 'dir2'
222+
dir2_path = os.path.join(src_dir, dir2)
223+
os.mkdir(dir2_path)
224+
225+
# if not recursive we get the same result as the previous test
226+
check_list(expected_list, build_file_list([src_dir], vos_root))
227+
228+
# now recursive
229+
expected_list = \
230+
[(src_dir, get_vos_path(src_dir, src_dir)),
231+
(dir1_path, get_vos_path(dir1_path, src_dir)),
232+
(dir2_path, get_vos_path(dir2_path, src_dir)),
233+
(file1_path, get_vos_path(file1_path, src_dir)),
234+
(file2_path, get_vos_path(file2_path, src_dir))]
235+
check_list(expected_list, build_file_list(
236+
[src_dir], vos_root, recursive=True))
237+
238+
# repeat but now add "/" at the end of the source. The sync just
239+
# the content of the dir and not the dir itself
240+
src_dir_content = src_dir + '/'
241+
expected_list_content = \
242+
[(dir1_path, get_vos_path(dir1_path, src_dir_content)),
243+
(dir2_path, get_vos_path(dir2_path, src_dir_content)),
244+
(file1_path, get_vos_path(file1_path, src_dir_content)),
245+
(file2_path, get_vos_path(file2_path, src_dir_content))]
246+
check_list(expected_list_content, build_file_list(
247+
[src_dir_content], vos_root, recursive=True))
248+
249+
# path='syncsrc' and vos_root='ivo://someservice/somepath' should generate
250+
# the same list as path='syncsrc/' and
251+
# vos_root='ivo://someservice/somepath/syncsrc' with the exception of
252+
# the entry corresponding to the 'syncsrc' directory which is not
253+
# generated in the second case (but assumed to already exist on server)
254+
expected_list.pop(0)
255+
check_list(expected_list, build_file_list(
256+
[src_dir_content], '{}/{}'.format(vos_root, src_dir_name),
257+
recursive=True))
258+
259+
# filtered results
260+
expected_list = \
261+
[(src_dir, get_vos_path(src_dir, src_dir)),
262+
(dir1_path, get_vos_path(dir1_path, src_dir)),
263+
(file1_path, get_vos_path(file1_path, src_dir)),
264+
(file2_path, get_vos_path(file2_path, src_dir))]
265+
check_list(expected_list, build_file_list(
266+
[src_dir], vos_root, recursive=True, include="1"))
267+
268+
# repeat with no recursive
269+
expected_list = \
270+
[(src_dir, get_vos_path(src_dir, src_dir)),
271+
(file1_path, get_vos_path(file1_path, src_dir))]
272+
check_list(expected_list, build_file_list(
273+
[src_dir], vos_root, recursive=False, include="1"))
274+
275+
# filter with exclude
276+
expected_list = \
277+
[(src_dir, get_vos_path(src_dir, src_dir)),
278+
(dir1_path, get_vos_path(dir1_path, src_dir)),
279+
(file1_path, get_vos_path(file1_path, src_dir))]
280+
check_list(expected_list, build_file_list(
281+
[src_dir], vos_root, recursive=True, exclude="2"))
282+
283+
# redo while doubling up the list
284+
check_list(expected_list, build_file_list(
285+
[src_dir]*2, vos_root, recursive=True, exclude="2"))
286+
287+
# sync src_dir + a file
288+
expected_list.append((file1_path, '{}/{}'.format(vos_root, file1)))
289+
check_list(expected_list, build_file_list(
290+
[src_dir, file1_path], vos_root, recursive=True, exclude="2"))
291+
292+
# error when the src file does not exist
293+
with pytest.raises(ValueError):
294+
build_file_list([src_dir, 'bogus'], vos_root)
295+
296+
297+
def test_transfer_report():
298+
tr = TransferReport()
299+
assert not tr.files_erred
300+
assert not tr.files_sent
301+
assert not tr.files_skipped
302+
assert not tr.bytes_sent
303+
assert not tr.bytes_skipped
304+
305+
306+
@module_patch('vos.commands.vsync.get_client')
307+
def test_execute(get_client):
308+
now = datetime.datetime.timestamp(datetime.datetime.now())
309+
node = Mock(props={'MD5': 'beef'}, attr={'st_size': 3, 'st_ctime': now})
310+
get_node_mock = Mock(return_value=node)
311+
client_mock = Mock()
312+
client_mock.get_node = get_node_mock
313+
get_client.return_value = client_mock
314+
tmp_file = tempfile.NamedTemporaryFile()
315+
316+
class Options:
317+
pass
318+
319+
options = Options
320+
options.overwrite = True
321+
options.ignore_checksum = True
322+
options.certfile = None
323+
options.token = None
324+
options.cache_nodes = False
325+
expected_report = TransferReport()
326+
expected_report.files_sent = 1
327+
assert expected_report == execute(tmp_file.name,
328+
'vos:service/path', options)
329+
330+
# put some content in the file
331+
open(tmp_file.name, 'w').write('ABC')
332+
expected_report.bytes_sent = 3
333+
assert expected_report == execute(tmp_file.name,
334+
'vos:service/path', options)
335+
336+
# no override, same md5 and older remote time = > no update
337+
now = datetime.datetime.timestamp(datetime.datetime.now())
338+
node.attr['st_ctime'] = now
339+
md5 = compute_md5(tmp_file.name)
340+
node.props['MD5'] = md5
341+
options.overwrite = False
342+
expected_report = TransferReport()
343+
expected_report.files_skipped = 1
344+
expected_report.bytes_skipped = 3
345+
assert expected_report == execute(tmp_file.name,
346+
'vos:service/path', options)
347+
348+
# mismatched md5 but ignore checksum => no update
349+
node.props['MD5'] = 'beef'
350+
assert expected_report == execute(tmp_file.name,
351+
'vos:service/path', options)
352+
353+
# mismached md5 and no ignore checksum => update
354+
options.ignore_checksum = False
355+
expected_report = TransferReport()
356+
expected_report.files_sent = 1
357+
expected_report.bytes_sent = 3
358+
assert expected_report == execute(tmp_file.name,
359+
'vos:service/path', options)
360+
361+
# ignore checksum but mismatched size => update
362+
options.ignore_checksum = True
363+
node.props['MD5'] = md5
364+
node.attr['st_size'] = 7
365+
assert expected_report == execute(tmp_file.name,
366+
'vos:service/path', options)
367+
368+
# stalled remote copy => update
369+
node.attr['st_size'] = 3
370+
node.attr['st_ctime'] = now - 10000
371+
assert expected_report == execute(tmp_file.name,
372+
'vos:service/path', options)
373+
374+
# OSErrors on update
375+
client_mock.copy.side_effect = OSError('NodeLocked')
376+
expected_report = TransferReport()
377+
expected_report.files_erred = 1
378+
assert expected_report == execute(tmp_file.name,
379+
'vos:service/path', options)

0 commit comments

Comments
 (0)