- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 33.2k
          gh-130819: Update tarfile.py#_create_gnu_long_header to align with GNU Tar
          #130820
        
          New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -895,6 +895,9 @@ class TarInfo(object): | |
| _link_target = None, | ||
| ) | ||
|  | ||
| _name_uid0 = None # Cached uname of uid=0 | ||
| _name_gid0 = None # Cached gname of gid=0 | ||
| 
      Comment on lines
    
      +898
     to 
      +899
    
   There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What I meant before is: why using a class variable? the issue is that once we deduce  EDIT: I didn't see you comment, my bad. Then we need to think of another solution because storing them in  More generally, we should be able to set cached contextual information on TarInfo objects coming from a TarFile. | ||
|  | ||
| def __init__(self, name=""): | ||
| """Construct a TarInfo object. name is the optional name | ||
| of the member. | ||
|  | @@ -1202,6 +1205,13 @@ def _create_gnu_long_header(cls, name, type, encoding, errors): | |
| info["type"] = type | ||
| info["size"] = len(name) | ||
| info["magic"] = GNU_MAGIC | ||
| info["mode"] = 0o100644 | ||
| if cls._name_uid0 is None or cls._name_gid0 is None: | ||
| user_group_names = TarFile._get_user_group_names(0, 0, {}, {}) | ||
| cls._name_uid0 = user_group_names[0] or "" | ||
| cls._name_gid0 = user_group_names[1] or "" | ||
| info["uname"] = cls._name_uid0 | ||
| info["gname"] = cls._name_gid0 | ||
|  | ||
| # create extended header + name blocks. | ||
| return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ | ||
|  | @@ -2202,22 +2212,12 @@ def gettarinfo(self, name=None, arcname=None, fileobj=None): | |
| tarinfo.type = type | ||
| tarinfo.linkname = linkname | ||
|  | ||
| # Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive. To | ||
| # speed things up, cache the resolved usernames and group names. | ||
| if pwd: | ||
| if tarinfo.uid not in self._unames: | ||
| try: | ||
| self._unames[tarinfo.uid] = pwd.getpwuid(tarinfo.uid)[0] | ||
| except KeyError: | ||
| self._unames[tarinfo.uid] = '' | ||
| tarinfo.uname = self._unames[tarinfo.uid] | ||
| if grp: | ||
| if tarinfo.gid not in self._gnames: | ||
| try: | ||
| self._gnames[tarinfo.gid] = grp.getgrgid(tarinfo.gid)[0] | ||
| except KeyError: | ||
| self._gnames[tarinfo.gid] = '' | ||
| tarinfo.gname = self._gnames[tarinfo.gid] | ||
| uname, gname = TarFile._get_user_group_names(tarinfo.uid, tarinfo.gid, | ||
| self._unames, self._gnames) | ||
| if uname is not None: | ||
| tarinfo.uname = uname | ||
| if gname is not None: | ||
| tarinfo.gname = gname | ||
|  | ||
| if type in (CHRTYPE, BLKTYPE): | ||
| if hasattr(os, "major") and hasattr(os, "minor"): | ||
|  | @@ -2560,6 +2560,29 @@ def _extract_member(self, tarinfo, targetpath, set_attrs=True, | |
| self.chmod(tarinfo, targetpath) | ||
| self.utime(tarinfo, targetpath) | ||
|  | ||
| def _get_user_group_names(uid, gid, unames_cache, gnames_cache): | ||
| # Calls to pwd.getpwuid() and grp.getgrgid() tend to be expensive. | ||
| # To speed things up, cache the resolved usernames and group names. | ||
| if pwd: | ||
| if uid not in unames_cache: | ||
| try: | ||
| unames_cache[uid] = pwd.getpwuid(uid)[0] | ||
| except KeyError: | ||
| unames_cache[uid] = '' | ||
| uname = unames_cache[uid] | ||
| else: | ||
| uname = None | ||
| if grp: | ||
| if gid not in gnames_cache: | ||
| try: | ||
| gnames_cache[gid] = grp.getgrgid(gid)[0] | ||
| except KeyError: | ||
| gnames_cache[gid] = '' | ||
| gname = gnames_cache[gid] | ||
| else: | ||
| gname = None | ||
| return uname, gname | ||
|  | ||
| #-------------------------------------------------------------------------- | ||
| # Below are the different file methods. They are called via | ||
| # _extract_member() when extract() is called. They can be replaced in a | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| Emit ``mode``, ``uname`` and ``gname`` fields for long paths in | ||
| :mod:`tarfile` archives, providing better bit-for-bit compatibility with GNU | ||
| ``tar(1)``. | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will need to be moved in whatsnew/3.15.rst now