diff --git a/lib/rift/Annex.py b/lib/rift/Annex.py index 49ca08b8..39eeae00 100644 --- a/lib/rift/Annex.py +++ b/lib/rift/Annex.py @@ -90,7 +90,7 @@ def is_binary(filepath, blocksize=65536): def hashfile(filepath, iosize=65536): """Compute a digest of filepath content.""" - hasher = hashlib.md5() + hasher = hashlib.sha3_256() with open(filepath, 'rb') as srcfile: buf = srcfile.read(iosize) while len(buf) > 0: @@ -123,10 +123,13 @@ def is_pointer(cls, filepath): identifier. """ meta = os.stat(filepath) - if meta.st_size == 32: + + # MD5 or SHA3 256 + if meta.st_size in (32, 64): with open(filepath, encoding='utf-8') as fh: - identifier = fh.read(32) + identifier = fh.read(meta.st_size) return all(byte in string.hexdigits for byte in identifier) + return False def get(self, identifier, destpath): @@ -223,16 +226,34 @@ def list(self): insertion time. """ for filename in os.listdir(self.path): - if not filename.endswith('.info'): - info = self._load_metadata(filename) - names = info.get('filenames', []) - for annexed_file in names.values(): - insertion_time = annexed_file['date'] - insertion_time = datetime.datetime.strptime(insertion_time, "%c").timestamp() - - #The file size must come from the filesystem + if filename.endswith(_INFOSUFFIX): + continue + + info = self._load_metadata(filename) + names = info.get('filenames', []) + for annexed_file, details in names.items(): + insertion_time = details['date'] + + # Handle different date formats (old method) + if isinstance(insertion_time, str): + for fmt in ('%a %b %d %H:%M:%S %Y', '%a %d %b %Y %H:%M:%S %p %Z'): + try: + insertion_time = datetime.datetime.strptime( + insertion_time, fmt + ).timestamp() + break + except ValueError: + continue + else: + logging.warning( + "Unknown time format: %s (type %s)", + insertion_time, + type(insertion_time) + ) + + # The file size must come from the filesystem meta = os.stat(os.path.join(self.path, filename)) - yield filename, meta.st_size, insertion_time, names + yield filename, meta.st_size, insertion_time, [annexed_file] def push(self, filepath): """ @@ -254,21 +275,21 @@ def push(self, filepath): destinfo = None if os.path.exists(destpath): destinfo = os.stat(destpath) - if destinfo and destinfo.st_size == originfo.st_size and \ - filename in metadata.get('filenames', {}): - logging.debug('%s is already into annex, skipping it', filename) - - else: - # Update them and write them back - fileset = metadata.setdefault('filenames', {}) - fileset.setdefault(filename, {}) - fileset[filename]['date'] = time.strftime("%c") - self._save_metadata(digest, metadata) - - # Move binary file to annex - logging.debug('Importing %s into annex (%s)', filepath, digest) - shutil.copyfile(filepath, destpath) - os.chmod(destpath, self.WMODE) + if destinfo and destinfo.st_size == originfo.st_size and \ + filename in metadata.get('filenames', {}): + logging.debug('%s is already into annex, skipping it', filename) + return + + # Update them and write them back + fileset = metadata.setdefault('filenames', {}) + fileset.setdefault(filename, {}) + fileset[filename]['date'] = time.time() # Unix timestamp + self._save_metadata(digest, metadata) + + # Move binary file to annex + logging.debug('Importing %s into annex (%s)', filepath, digest) + shutil.copyfile(filepath, destpath) + os.chmod(destpath, self.WMODE) # Verify permission are correct before copying os.chmod(filepath, self.RMODE) diff --git a/lib/rift/Controller.py b/lib/rift/Controller.py index 02684272..c05ecc7a 100644 --- a/lib/rift/Controller.py +++ b/lib/rift/Controller.py @@ -336,8 +336,12 @@ def action_annex(args, config, staff, modules): print(fmt % ('ID', 'SIZE', 'DATE', 'FILENAMES')) print(fmt % ('--', '----', '----', '---------')) for filename, size, mtime, names in annex.list(): - timestr = time.strftime('%x %X', time.localtime(mtime)) - print(fmt % (filename, size, timestr, ','.join(names))) + try: + timestr = time.strftime('%x %X', time.localtime(mtime)) + print(fmt % (filename, size, timestr, ','.join(names))) + + except TypeError: + print(fmt % (filename, size, mtime, ','.join(names))) elif args.annex_cmd == 'push': for srcfile in args.files: diff --git a/tests/Annex.py b/tests/Annex.py index 36a51541..5e814dd6 100644 --- a/tests/Annex.py +++ b/tests/Annex.py @@ -192,22 +192,17 @@ def test_delete(self): self.annex.get_by_path(source_file.name, '/dev/null') def test_list(self): - """ Test list method """ + """Test the list method""" source_size = os.stat(self.source.name).st_size - source_insertion_time = datetime.datetime.strptime(time.strftime('%c'), '%c').timestamp() - # Get the current time with the %c format and convert it to unix timestamp to - # have the same method as annex.list (in terms of precision) + source_insertion_time = time.time() self.annex.push(self.source.name) - # Check if the file pointer is present in the annex list output - # by checking it's attributes for filename, size, insertion_time, names in self.annex.list(): self.assertEqual(get_digest_from_path(self.source.name), filename) self.assertEqual(source_size, size) - # As tests can take time to run, accept less or equal 1 second shift - self.assertAlmostEqual(source_insertion_time, insertion_time, delta=1) - self.assertTrue(os.path.basename(self.source.name) in names.keys()) + self.assertAlmostEqual(source_insertion_time, insertion_time, delta=1) # delta for potentials delay + self.assertTrue(os.path.basename(self.source.name) in names) def test_push(self): """ Test push method """