|
| 1 | +#!/usr/bin/python3 |
| 2 | +import requests |
| 3 | +from lxml import etree |
| 4 | +import json |
| 5 | +import sys |
| 6 | +def get_package_info_from_upstream(distro, pacakge_name): |
| 7 | + if distro == 'debian': |
| 8 | + distro_url = "https://packages.debian.org/search?keywords=" + pacakge_name + "&searchon=names&suite=all§ion=all" |
| 9 | + elif distro == 'ubuntu': |
| 10 | + distro_url = "https://packages.ubuntu.com/search?keywords=" + pacakge_name + "&searchon=names&suite=all§ion=all" |
| 11 | + else: |
| 12 | + print("invalid distro %s, quit" % distro) |
| 13 | + sys.exit(1) |
| 14 | + # Step 1: Fetch HTML content from the URL |
| 15 | + response = requests.get(distro_url) |
| 16 | + html_content = response.content # Use .content for lxml to handle byte data |
| 17 | + # Step 2: Parse HTML with lxml |
| 18 | + parser = etree.HTMLParser() |
| 19 | + tree = etree.fromstring(html_content, parser) |
| 20 | + # Step 3: Extract data |
| 21 | + for h3 in tree.xpath('//h3'): |
| 22 | + section_title = h3.text |
| 23 | + ul = h3.xpath('./following-sibling::ul[1]') |
| 24 | + debian_all_package_info = {} |
| 25 | + if ul: |
| 26 | + list_items = ul[0].xpath('.//li') |
| 27 | + for li in list_items: |
| 28 | + debian_package_info = {} |
| 29 | + item_text = li.xpath('.//text()[not(parent::a)]') |
| 30 | + item_class = li.get("class") |
| 31 | + package_file_release = item_class |
| 32 | + package_file_version = item_text[1].split(":")[0] |
| 33 | + if "arm64" in item_text[1].split(":")[1]: |
| 34 | + package_file_arm64_full_name = pacakge_name + "_" + package_file_version + "_arm64.deb" |
| 35 | + debian_package_info["arm64"] = package_file_arm64_full_name |
| 36 | + if "armhf" in item_text[1].split(":")[1]: |
| 37 | + package_file_armhf_full_name = pacakge_name + "_" + package_file_version + "_armhf.deb" |
| 38 | + debian_package_info["armhf"] = package_file_armhf_full_name |
| 39 | + if "amd64" in item_text[1].split(":")[1]: |
| 40 | + package_file_amd64_full_name = pacakge_name + "_" + package_file_version + "_amd64.deb" |
| 41 | + debian_package_info["amd64"] = package_file_amd64_full_name |
| 42 | + if "riscv64" in item_text[1].split(":")[1]: |
| 43 | + package_file_riscv64_full_name = pacakge_name + "_" + package_file_version + "_riscv64.deb" |
| 44 | + debian_package_info["riscv64"] = package_file_riscv64_full_name |
| 45 | + debian_all_package_info[item_class] = debian_package_info |
| 46 | + return debian_all_package_info |
| 47 | +if len(sys.argv) < 2: |
| 48 | + print("Usage: python parse.py <pacakge_name>") |
| 49 | + sys.exit(1) |
| 50 | +package_name = sys.argv[1] |
| 51 | +debian_info = get_package_info_from_upstream("debian", package_name) |
| 52 | +ubuntu_info = get_package_info_from_upstream("ubuntu", package_name) |
| 53 | +if debian_info and ubuntu_info: |
| 54 | + all_info_result = {**debian_info, **ubuntu_info} |
| 55 | + json_file_name = package_name + ".json" |
| 56 | + with open(json_file_name, "w") as outfile: |
| 57 | + json.dump(all_info_result, outfile) |
| 58 | +else: |
| 59 | + print("failed to get package info") |
| 60 | + sys.exit(1) |
0 commit comments