From 4ef84a62993f80ee6870728db2766b033eba136b Mon Sep 17 00:00:00 2001 From: Quentin ANIERE Date: Tue, 26 Aug 2025 11:16:52 +0200 Subject: [PATCH 1/3] Annex.py: switch hash algorithm to SHA3-256 --- lib/rift/Annex.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lib/rift/Annex.py b/lib/rift/Annex.py index 49ca08b8..0da941ad 100644 --- a/lib/rift/Annex.py +++ b/lib/rift/Annex.py @@ -90,7 +90,7 @@ def is_binary(filepath, blocksize=65536): def hashfile(filepath, iosize=65536): """Compute a digest of filepath content.""" - hasher = hashlib.md5() + hasher = hashlib.sha3_256() with open(filepath, 'rb') as srcfile: buf = srcfile.read(iosize) while len(buf) > 0: @@ -123,10 +123,20 @@ def is_pointer(cls, filepath): identifier. """ meta = os.stat(filepath) + + # MD5 if meta.st_size == 32: + logging.warning("Using deprecated hash algorithm (MD5)") with open(filepath, encoding='utf-8') as fh: identifier = fh.read(32) return all(byte in string.hexdigits for byte in identifier) + + # SHA3 256 + elif meta.st_size == 64: + with open(filepath, encoding='utf-8') as fh: + identifier = fh.read(64) + return all(byte in string.hexdigits for byte in identifier) + return False def get(self, identifier, destpath): From cb523262b194252e1fc6a2b4ff3b756539250ac7 Mon Sep 17 00:00:00 2001 From: Quentin ANIERE Date: Tue, 26 Aug 2025 13:37:05 +0200 Subject: [PATCH 2/3] Annex.py: Switch date to UNIX timestamp --- lib/rift/Annex.py | 60 ++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/lib/rift/Annex.py b/lib/rift/Annex.py index 0da941ad..4713abd5 100644 --- a/lib/rift/Annex.py +++ b/lib/rift/Annex.py @@ -236,13 +236,29 @@ def list(self): if not filename.endswith('.info'): info = self._load_metadata(filename) names = info.get('filenames', []) - for annexed_file in names.values(): - insertion_time = annexed_file['date'] - insertion_time = datetime.datetime.strptime(insertion_time, "%c").timestamp() - - #The file size must come from the filesystem - meta = os.stat(os.path.join(self.path, filename)) - yield filename, meta.st_size, insertion_time, names + for annexed_file, details in names.items(): + insertion_time = details['date'] + + # Handle different date formats (old method) + if isinstance(insertion_time, str): + for fmt in ('%a %b %d %H:%M:%S %Y', '%a %d %b %Y %H:%M:%S %p %Z'): + try: + insertion_time = datetime.datetime.strptime(insertion_time, fmt).timestamp() + break + except ValueError: + continue + else: + raise ValueError(f"Invalid date format in metadata: {insertion_time}") + + # UNIX timestamp + elif isinstance(insertion_time, (int, float)): + insertion_time = insertion_time + else: + raise ValueError("Invalid date format in metadata") + + # The file size must come from the filesystem + meta = os.stat(os.path.join(self.path, filename)) + yield filename, meta.st_size, insertion_time, [annexed_file] def push(self, filepath): """ @@ -264,21 +280,21 @@ def push(self, filepath): destinfo = None if os.path.exists(destpath): destinfo = os.stat(destpath) - if destinfo and destinfo.st_size == originfo.st_size and \ - filename in metadata.get('filenames', {}): - logging.debug('%s is already into annex, skipping it', filename) - - else: - # Update them and write them back - fileset = metadata.setdefault('filenames', {}) - fileset.setdefault(filename, {}) - fileset[filename]['date'] = time.strftime("%c") - self._save_metadata(digest, metadata) - - # Move binary file to annex - logging.debug('Importing %s into annex (%s)', filepath, digest) - shutil.copyfile(filepath, destpath) - os.chmod(destpath, self.WMODE) + if destinfo and destinfo.st_size == originfo.st_size and \ + filename in metadata.get('filenames', {}): + logging.debug('%s is already into annex, skipping it', filename) + return + + # Update them and write them back + fileset = metadata.setdefault('filenames', {}) + fileset.setdefault(filename, {}) + fileset[filename]['date'] = time.time() # Unix timestamp + self._save_metadata(digest, metadata) + + # Move binary file to annex + logging.debug('Importing %s into annex (%s)', filepath, digest) + shutil.copyfile(filepath, destpath) + os.chmod(destpath, self.WMODE) # Verify permission are correct before copying os.chmod(filepath, self.RMODE) From 6bc1e338f3e6c4a4f1fb16ed9da50fe3e509462b Mon Sep 17 00:00:00 2001 From: Quentin ANIERE Date: Tue, 26 Aug 2025 14:00:49 +0200 Subject: [PATCH 3/3] Annex.py: Fix tests --- tests/Annex.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/Annex.py b/tests/Annex.py index 36a51541..5e814dd6 100644 --- a/tests/Annex.py +++ b/tests/Annex.py @@ -192,22 +192,17 @@ def test_delete(self): self.annex.get_by_path(source_file.name, '/dev/null') def test_list(self): - """ Test list method """ + """Test the list method""" source_size = os.stat(self.source.name).st_size - source_insertion_time = datetime.datetime.strptime(time.strftime('%c'), '%c').timestamp() - # Get the current time with the %c format and convert it to unix timestamp to - # have the same method as annex.list (in terms of precision) + source_insertion_time = time.time() self.annex.push(self.source.name) - # Check if the file pointer is present in the annex list output - # by checking it's attributes for filename, size, insertion_time, names in self.annex.list(): self.assertEqual(get_digest_from_path(self.source.name), filename) self.assertEqual(source_size, size) - # As tests can take time to run, accept less or equal 1 second shift - self.assertAlmostEqual(source_insertion_time, insertion_time, delta=1) - self.assertTrue(os.path.basename(self.source.name) in names.keys()) + self.assertAlmostEqual(source_insertion_time, insertion_time, delta=1) # delta for potentials delay + self.assertTrue(os.path.basename(self.source.name) in names) def test_push(self): """ Test push method """