| 
 | 1 | +import os  | 
 | 2 | +import sys  | 
 | 3 | +import re  | 
 | 4 | +import itertools  | 
 | 5 | +import requests  | 
 | 6 | +import hashlib  | 
 | 7 | + | 
 | 8 | +from urllib.parse import quote  | 
 | 9 | +from pathlib import Path  | 
 | 10 | +from github import Github  | 
 | 11 | +from typing import List, Dict, Set  | 
 | 12 | + | 
 | 13 | +# Define yanked versions - modify this dictionary as needed  | 
 | 14 | +yanked_versions = {  | 
 | 15 | +         "confluent-kafka": {  | 
 | 16 | +             "2.11.0+gr",  | 
 | 17 | +             "2.11.0+gr.1",  | 
 | 18 | +         },  | 
 | 19 | +    }  | 
 | 20 | + | 
 | 21 | +HTML_TEMPLATE = """<!DOCTYPE html>  | 
 | 22 | + <html>  | 
 | 23 | + <head>  | 
 | 24 | +     <title>{package_name}</title>  | 
 | 25 | + </head>  | 
 | 26 | + <body>  | 
 | 27 | +     <h1>{package_name}</h1>  | 
 | 28 | +     {package_links}  | 
 | 29 | + </body>  | 
 | 30 | + </html>  | 
 | 31 | +"""  | 
 | 32 | + | 
 | 33 | +def normalize(name):  | 
 | 34 | +    """Normalize package name according to PEP 503."""  | 
 | 35 | +    return re.sub(r"[-_.]+", "-", name).lower()  | 
 | 36 | + | 
 | 37 | +def calculate_sha256(file_path):  | 
 | 38 | +    with open(file_path, "rb") as f:  | 
 | 39 | +        digest = hashlib.file_digest(f, "sha256")  | 
 | 40 | + | 
 | 41 | +    return digest.hexdigest()  | 
 | 42 | + | 
 | 43 | +def extract_version_from_filename(filename: str) -> str:  | 
 | 44 | +    """Extract version from wheel or sdist filename."""  | 
 | 45 | +    # Remove extension  | 
 | 46 | +    name = filename.replace('.tar.gz', '').replace('.whl', '')  | 
 | 47 | +      | 
 | 48 | +    # For wheels: package-version-python-abi-platform  | 
 | 49 | +    # For sdist: package-version  | 
 | 50 | +    parts = name.split('-')  | 
 | 51 | +    if len(parts) >= 2:  | 
 | 52 | +        return parts[1]  | 
 | 53 | +    return ""  | 
 | 54 | + | 
 | 55 | +class PackageIndexBuilder:  | 
 | 56 | +    def __init__(self, token: str, repo_name: str, output_dir: str, yanked_versions: Dict[str, Set[str]] = None):  | 
 | 57 | +        self.github = Github(token)  | 
 | 58 | +        self.repo = self.github.get_repo(repo_name)  | 
 | 59 | +        self.output_dir = Path(output_dir)  | 
 | 60 | +        self.packages: Dict[str, List[Dict]] = {}  | 
 | 61 | +        self.yanked_versions = yanked_versions or {}  | 
 | 62 | +          | 
 | 63 | +        # Set up authenticated session  | 
 | 64 | +        self.session = requests.Session()  | 
 | 65 | +        self.session.headers.update({  | 
 | 66 | +            "Authorization": f"token {token}",  | 
 | 67 | +            "Accept": "application/octet-stream",  | 
 | 68 | +        })  | 
 | 69 | + | 
 | 70 | +    def is_version_yanked(self, package_name: str, version: str) -> bool:  | 
 | 71 | +        """Check if a specific version of a package is yanked."""  | 
 | 72 | +        normalized_package = normalize(package_name)  | 
 | 73 | +        return normalized_package in self.yanked_versions and version in self.yanked_versions[normalized_package]  | 
 | 74 | + | 
 | 75 | +    def collect_packages(self):  | 
 | 76 | +        print("Query release assets")  | 
 | 77 | +          | 
 | 78 | +        for release in self.repo.get_releases():  | 
 | 79 | +            for asset in release.get_assets():  | 
 | 80 | +                if asset.name.endswith(('.whl', '.tar.gz')):  | 
 | 81 | +                    package_name = normalize(asset.name.split('-')[0])  | 
 | 82 | +                    if package_name not in self.packages:  | 
 | 83 | +                        self.packages[package_name] = []  | 
 | 84 | + | 
 | 85 | +                    version = extract_version_from_filename(asset.name)  | 
 | 86 | +                    self.packages[package_name].append({  | 
 | 87 | +                        'filename': asset.name,  | 
 | 88 | +                        'url': asset.url,  | 
 | 89 | +                        'size': asset.size,  | 
 | 90 | +                        'upload_time': asset.created_at.strftime('%Y-%m-%d %H:%M:%S'),  | 
 | 91 | +                        'version': version,  | 
 | 92 | +                    })  | 
 | 93 | + | 
 | 94 | +    def generate_index_html(self):  | 
 | 95 | +        # Generate main index  | 
 | 96 | +        package_list = self.packages.keys()  | 
 | 97 | +        main_index = HTML_TEMPLATE.format(  | 
 | 98 | +            package_name="Simple Package Index",  | 
 | 99 | +            package_links="\n".join([f'<a href="{x}/">{x}</a><br/>' for x in package_list])  | 
 | 100 | +        )  | 
 | 101 | + | 
 | 102 | +        with open(self.output_dir / "index.html", "w") as f:  | 
 | 103 | +            f.write(main_index)  | 
 | 104 | +   | 
 | 105 | +        for package, assets in self.packages.items():  | 
 | 106 | + | 
 | 107 | +            package_dir = self.output_dir / package  | 
 | 108 | +            package_dir.mkdir(exist_ok=True)  | 
 | 109 | + | 
 | 110 | +            # Generate package-specific index.html  | 
 | 111 | +            file_links = []  | 
 | 112 | +            assets = sorted(assets, key=lambda x: x["filename"])  | 
 | 113 | +            for filename, items in itertools.groupby(assets, key=lambda x: x["filename"]):  | 
 | 114 | +                asset_info = next(items)  | 
 | 115 | +                url = asset_info['url']  | 
 | 116 | +                version = asset_info['version']  | 
 | 117 | + | 
 | 118 | +                # Download the file  | 
 | 119 | +                with open(package_dir / filename, 'wb') as f:  | 
 | 120 | +                    print (f"Downloading '{filename}' from '{url}'")  | 
 | 121 | +                    response = self.session.get(url, stream=True)  | 
 | 122 | +                    response.raise_for_status()  | 
 | 123 | +                    for chunk in response.iter_content(chunk_size=8192):  | 
 | 124 | +                        if chunk:  | 
 | 125 | +                            f.write(chunk)  | 
 | 126 | + | 
 | 127 | +                sha256_hash = calculate_sha256(package_dir / filename)  | 
 | 128 | + | 
 | 129 | +                # Check if this version is yanked  | 
 | 130 | +                yanked_attr = ""  | 
 | 131 | +                if self.is_version_yanked(package, version):  | 
 | 132 | +                    yanked_attr = ' data-yanked="true"'  | 
 | 133 | + | 
 | 134 | +                file_links.append(  | 
 | 135 | +                    f'<a href="{quote(filename)}#sha256={sha256_hash}"{yanked_attr}>{filename}</a><br/>'  | 
 | 136 | +                )  | 
 | 137 | + | 
 | 138 | +            package_index = HTML_TEMPLATE.format(  | 
 | 139 | +                package_name=f"Links for {package}",  | 
 | 140 | +                package_links="\n".join(file_links)  | 
 | 141 | +            )  | 
 | 142 | + | 
 | 143 | +            with open(package_dir / "index.html", "w") as f:  | 
 | 144 | +                f.write(package_index)  | 
 | 145 | + | 
 | 146 | +    def build(self):  | 
 | 147 | +        # Create output directory  | 
 | 148 | +        self.output_dir.mkdir(parents=True, exist_ok=True)  | 
 | 149 | + | 
 | 150 | +        # Collect and generate  | 
 | 151 | +        self.collect_packages()  | 
 | 152 | +        self.generate_index_html()  | 
 | 153 | + | 
 | 154 | + | 
 | 155 | +def main():  | 
 | 156 | +    # Get environment variables  | 
 | 157 | +    token = os.environ.get("GITHUB_TOKEN")  | 
 | 158 | +    repo = os.environ.get("GITHUB_REPOSITORY")  | 
 | 159 | +    print (repo)  | 
 | 160 | +    output_dir = os.environ.get("OUTPUT_DIR", "dist")  | 
 | 161 | +      | 
 | 162 | +    if not all([token, repo]):  | 
 | 163 | +        print ("Missing required environment variables")  | 
 | 164 | +        sys.exit(1)  | 
 | 165 | + | 
 | 166 | +    builder = PackageIndexBuilder(token, repo, output_dir, yanked_versions)  | 
 | 167 | +    builder.build()  | 
 | 168 | + | 
 | 169 | +if __name__ == "__main__":  | 
 | 170 | +    main()  | 
0 commit comments