Skip to content

Commit 7c07847

Browse files
authored
Merge pull request #650 from jkloetzke/fix-gunzip-extract
Fix gunzip extract
2 parents 241b862 + a87fda6 commit 7c07847

File tree

2 files changed

+53
-20
lines changed

2 files changed

+53
-20
lines changed

pym/bob/scm/url.py

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,14 @@ async def _extract(self, cmds, invoker, dirCreated, stdout=None):
175175
if dirCreated or isYounger(destination, canary):
176176
for cmd in cmds:
177177
if shutil.which(cmd[0]) is None: continue
178-
await invoker.checkCommand(cmd, cwd=self.dir, stdout=stdout)
178+
if stdout:
179+
# Extracts to stdout. We must open the destination only
180+
# after we have confirmed that we run the extractor!
181+
# Otherwise we truncate the file.
182+
with open(stdout, 'wb') as f:
183+
await invoker.checkCommand(cmd, cwd=self.dir, stdout=f)
184+
else:
185+
await invoker.checkCommand(cmd, cwd=self.dir, stdout=stdout)
179186
invoker.trace("<touch>", canary)
180187
with open(canary, "wb") as f:
181188
pass
@@ -246,9 +253,8 @@ async def extract(self, invoker, dirCreated):
246253
else:
247254
raise BuildError("unkown suffix")
248255

249-
with open(dst, 'wb') as f:
250-
cmd = [self.CMD, "-c", src]
251-
await self._extract([cmd], invoker, dirCreated, f)
256+
cmd = [self.CMD, "-c", src]
257+
await self._extract([cmd], invoker, dirCreated, dst)
252258

253259
shutil.copystat(src, dst)
254260

@@ -527,7 +533,7 @@ def _download(self, url, destination, mode):
527533

528534
return True, None
529535

530-
async def _fetch(self, invoker, url, workspaceFile, destination, mode):
536+
async def _fetch(self, invoker, url, destination, mode):
531537
if url.scheme in ['', 'file']:
532538
# Verify that host name is empty or "localhost"
533539
if url.netloc not in ['', 'localhost']:
@@ -537,7 +543,7 @@ async def _fetch(self, invoker, url, workspaceFile, destination, mode):
537543
if isYounger(url.path, destination):
538544
if os.path.isdir(destination):
539545
invoker.fail("Destination", destination, "is an existing directory!")
540-
invoker.trace("<cp>", url.path, workspaceFile)
546+
invoker.trace("<cp>", url.path, destination)
541547
with tempfile.TemporaryDirectory(dir=os.path.dirname(destination)) as tmpDir:
542548
tmpFile = os.path.join(tmpDir, self.__fn)
543549
# Keep mtime when copying. Otherwise we would update
@@ -553,8 +559,7 @@ async def _fetch(self, invoker, url, workspaceFile, destination, mode):
553559
elif url.scheme in ["http", "https", "ftp"]:
554560
retries = self.__retries
555561
while True:
556-
invoker.trace("<wget>", url.geturl(), ">",
557-
workspaceFile, "retires:", retries)
562+
invoker.trace("<wget>", url.geturl(), ">", destination, "retires:", retries)
558563
try:
559564
updated, err = await invoker.runInExecutor(UrlScm._download, self, url, destination, mode)
560565
if err:
@@ -613,7 +618,7 @@ def _upload(self, source, url):
613618

614619
return None
615620

616-
async def _put(self, invoker, workspaceFile, source, url):
621+
async def _put(self, invoker, source, url):
617622
if url.scheme in ['', 'file']:
618623
# Verify that host name is empty or "localhost"
619624
if url.netloc not in ['', 'localhost']:
@@ -624,7 +629,7 @@ async def _put(self, invoker, workspaceFile, source, url):
624629
if isYounger(source, url.path):
625630
if os.path.isdir(url.path):
626631
invoker.fail("Destination", url.path, "is an existing directory!")
627-
invoker.trace("<cp>", workspaceFile, url.path)
632+
invoker.trace("<cp>", source, url.path)
628633
destDir = os.path.dirname(url.path)
629634
os.makedirs(destDir, exist_ok=True)
630635
with tempfile.TemporaryDirectory(dir=destDir) as tmpDir:
@@ -634,7 +639,7 @@ async def _put(self, invoker, workspaceFile, source, url):
634639
elif url.scheme in ["http", "https"]:
635640
retries = self.__retries
636641
while True:
637-
invoker.trace("<wput>", workspaceFile, ">", url.geturl(), "retires:", retries)
642+
invoker.trace("<wput>", source, ">", url.geturl(), "retires:", retries)
638643
try:
639644
err = await invoker.runInExecutor(UrlScm._upload, self, source, url)
640645
if err:
@@ -689,7 +694,6 @@ async def invoke(self, invoker, workspaceCreated):
689694
if not os.path.isdir(invoker.joinPath(self.__dir)):
690695
os.makedirs(invoker.joinPath(self.__dir), exist_ok=True)
691696
workspaceCreated = True
692-
workspaceFile = os.path.join(self.__dir, self.__fn)
693697
extractor = self.__getExtractor()
694698

695699
destination = invoker.joinPath(self.__dir, self.__fn)
@@ -714,7 +718,7 @@ async def invoke(self, invoker, workspaceCreated):
714718
if err:
715719
# Output previously failed download attempt as warning
716720
invoker.warn(err)
717-
downloaded, err = await self._fetch(invoker, url, workspaceFile, destination, mode)
721+
downloaded, err = await self._fetch(invoker, url, destination, mode)
718722
if err is None:
719723
break
720724
else:
@@ -725,17 +729,17 @@ async def invoke(self, invoker, workspaceCreated):
725729

726730
# Always verify file hashes
727731
if self.__digestSha1:
728-
invoker.trace("<sha1sum>", workspaceFile)
732+
invoker.trace("<sha1sum>", destination)
729733
d = hashFile(destination, hashlib.sha1).hex()
730734
if d != self.__digestSha1:
731735
invoker.fail("SHA1 digest did not match! expected:", self.__digestSha1, "got:", d)
732736
if self.__digestSha256:
733-
invoker.trace("<sha256sum>", workspaceFile)
737+
invoker.trace("<sha256sum>", destination)
734738
d = hashFile(destination, hashlib.sha256).hex()
735739
if d != self.__digestSha256:
736740
invoker.fail("SHA256 digest did not match! expected:", self.__digestSha256, "got:", d)
737741
if self.__digestSha512:
738-
invoker.trace("<sha512sum>", workspaceFile)
742+
invoker.trace("<sha512sum>", destination)
739743
d = hashFile(destination, hashlib.sha512).hex()
740744
if d != self.__digestSha512:
741745
invoker.fail("SHA512 digest did not match! expected:", self.__digestSha512, "got:", d)
@@ -745,7 +749,7 @@ async def invoke(self, invoker, workspaceCreated):
745749
if downloaded:
746750
for url, upload in urls:
747751
if upload:
748-
await self._put(invoker, workspaceFile, destination, url)
752+
await self._put(invoker, destination, url)
749753

750754
# Run optional extractors
751755
if extractor is not None:

test/unit/test_input_urlscm.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def testNoResponse(self):
341341
with self.assertRaises(InvocationError):
342342
self.invokeScm(workspace, scm)
343343

344-
class TestExtraction:
344+
class TestExtraction(TestCase):
345345

346346
@classmethod
347347
def setUpClass(cls):
@@ -355,7 +355,7 @@ def setUpClass(cls):
355355
f.write("Hello world!")
356356

357357
cls.tarGzFile = os.path.join(cls.dir, "test.tar.gz")
358-
subprocess.run(["tar", "-zcf", cls.tarGzFile, src],
358+
subprocess.run(["tar", "-C", cls.dir, "-zcf", "test.tar.gz", "src"],
359359
cwd=cls.dir, check=True)
360360
with open(cls.tarGzFile, "rb") as f:
361361
cls.tarGzDigestSha1 = hashlib.sha1(f.read()).digest().hex()
@@ -437,14 +437,43 @@ def testGz(self):
437437
self.assertExists(os.path.join(workspace, "test.txt.gz"))
438438
self.assertExists(os.path.join(workspace, "test.txt"))
439439

440+
def testGzDoubleInvocation(self):
441+
"""Invoking the SCM twice does not touch the extracted file"""
442+
scm = self.createUrlScm({
443+
"url" : self.gzFile,
444+
"digestSHA256" : self.gzDigestSha256,
445+
})
446+
with TemporaryWorkspace() as workspace:
447+
self.invokeScm(workspace, scm)
448+
449+
test_txt_gz = os.path.join(workspace, "test.txt.gz")
450+
test_txt = os.path.join(workspace, "test.txt")
451+
with open(test_txt_gz, "rb") as f:
452+
self.assertEqual(self.gzDigestSha256,
453+
hashlib.sha256(f.read()).digest().hex())
454+
with open(test_txt, "r") as f:
455+
self.assertEqual("Hello world!", f.read())
456+
first_txt_gz_ts = os.stat(test_txt_gz).st_mtime_ns
457+
first_txt_ts = os.stat(test_txt).st_mtime_ns
458+
459+
self.invokeScm(workspace, scm)
460+
with open(test_txt_gz, "rb") as f:
461+
self.assertEqual(self.gzDigestSha256,
462+
hashlib.sha256(f.read()).digest().hex())
463+
with open(test_txt, "r") as f:
464+
self.assertEqual("Hello world!", f.read())
465+
self.assertEqual(first_txt_gz_ts, os.stat(test_txt_gz).st_mtime_ns)
466+
self.assertEqual(first_txt_ts, os.stat(test_txt).st_mtime_ns)
467+
468+
440469
def testGzStripComponentsNotSupported(self):
441470
scm = self.createUrlScm({
442471
"url" : self.gzFile,
443472
"digestSHA256" : self.gzDigestSha256,
444473
"stripComponents" : 1,
445474
})
446475
with TemporaryWorkspace() as workspace:
447-
with self.assertRaises(InvocationError):
476+
with self.assertRaises(BuildError):
448477
self.invokeScm(workspace, scm)
449478

450479

0 commit comments

Comments
 (0)