diff --git a/poetry.lock b/poetry.lock index e153cd8..40503e3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -41,11 +41,11 @@ pytz = ">=2015.7" [[package]] name = "beautifulsoup4" -version = "4.10.0" +version = "4.11.1" description = "Screen-scraping library" category = "main" -optional = true -python-versions = ">3.0.0" +optional = false +python-versions = ">=3.6.0" [package.dependencies] soupsieve = ">1.2" @@ -337,7 +337,7 @@ name = "soupsieve" version = "2.3.1" description = "A modern CSS selector implementation for Beautiful Soup." category = "main" -optional = true +optional = false python-versions = ">=3.6" [[package]] @@ -502,7 +502,7 @@ docs = ["Sphinx", "furo"] [metadata] lock-version = "1.1" python-versions = ">=3.8" -content-hash = "c6d56c99359f5dce254177f391227d42d07feba1e4e7e7c6d5abd643e683e934" +content-hash = "f0cafd88b060a2f07afa64e64271180f58671d9444c3bb830f36417cacde85aa" [metadata.files] alabaster = [ @@ -522,8 +522,8 @@ babel = [ {file = "Babel-2.9.1.tar.gz", hash = "sha256:bc0c176f9f6a994582230df350aa6e05ba2ebe4b3ac317eab29d9be5d2768da0"}, ] beautifulsoup4 = [ - {file = "beautifulsoup4-4.10.0-py3-none-any.whl", hash = "sha256:9a315ce70049920ea4572a4055bc4bd700c940521d36fc858205ad4fcde149bf"}, - {file = "beautifulsoup4-4.10.0.tar.gz", hash = "sha256:c23ad23c521d818955a4151a67d81580319d4bf548d3d49f4223ae041ff98891"}, + {file = "beautifulsoup4-4.11.1-py3-none-any.whl", hash = "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30"}, + {file = "beautifulsoup4-4.11.1.tar.gz", hash = "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693"}, ] certifi = [ {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, diff --git a/pyproject.toml b/pyproject.toml index 7e8fae6..22ba1e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ xarray = ">=0.20.2" gsw = ">=3.4.0" Sphinx = {version = ">=4.4.0", optional = true} furo = {version = ">=2022.1.2", optional = true} +beautifulsoup4 = "^4.11.1" [tool.poetry.dev-dependencies] pytest = ">=6.2.5" diff --git a/utils/parse_info_input.py b/utils/parse_info_input.py new file mode 100644 index 0000000..2bb9cde --- /dev/null +++ b/utils/parse_info_input.py @@ -0,0 +1,65 @@ +from urllib.request import urlopen +from bs4 import BeautifulSoup, NavigableString, Tag + +import gsw +from types import FunctionType + +from functools import reduce + +url = "https://www.teos-10.org/pubs/gsw/html/gsw_{}.html" + +repls = ("deg C", "degC"), ("unitless", "1"), ("degrees of rotation", "arcdeg") + + +def parse_info(func_name, url=url): + soup = BeautifulSoup(urlopen(url.format(func_name)), features="html.parser") + for header in soup.find_all("h2"): + if header.text != "INPUT:": + continue + nextNode = header + while True: + nextNode = nextNode.nextSibling + if nextNode is None: + break + if isinstance(nextNode, Tag): + if nextNode.name == "h2": + break + txt = nextNode.get_text(strip=True).strip() + args = [i for i in txt.split('\n') if '=' in i] + return [(i.split("=")[0].strip(), reduce(lambda a, kv: a.replace(*kv), repls, i.split("[")[1].split("]")[0].strip())) for i in args] + return [] + + +def print_dict_attrs(): + all_gsw_function = [ + i + for i in dir(gsw) + if (isinstance(getattr(gsw, i), FunctionType) and not i.startswith("_")) + ] + args_all = {} + for func in all_gsw_function[:]: + try: + args = parse_info(func) + except: + args = [] + args_all[func] = args + print(args_all) + print('\n\n********************\n\n') + get_units_per_arg(args_all) + +def get_units_per_arg(args_all): + units = {} + for f in args_all.keys(): + args = args_all[f] + for a in args: + if a[0] == 'h': + print(f, a) + if a[0] in units.keys(): + if a[1] not in units[a[0]]: + units[a[0]].append(a[1]) + else: + units[a[0]] = [a[1]] + print(units) + +if __name__ == '__main__': + print_dict_attrs() diff --git a/utils/parse_info_output.py b/utils/parse_info_output.py new file mode 100644 index 0000000..c4f21e5 --- /dev/null +++ b/utils/parse_info_output.py @@ -0,0 +1,52 @@ +from urllib.request import urlopen +from bs4 import BeautifulSoup, NavigableString, Tag + +import gsw +from types import FunctionType + +from functools import reduce + +url = "https://www.teos-10.org/pubs/gsw/html/gsw_{}.html" + +repls = ("deg C", "degC"), ("unitless", "1"), ("degrees of rotation", "arcdeg") + + +def parse_info(func_name, url=url): + soup = BeautifulSoup(urlopen(url.format(func_name)), features="html.parser") + for header in soup.find_all("h2"): + if header.text != "OUTPUT:": + continue + nextNode = header + while True: + nextNode = nextNode.nextSibling + if nextNode is None: + break + if isinstance(nextNode, Tag): + if nextNode.name == "h2": + break + txt = nextNode.get_text(strip=True).strip() + name = txt.split("=")[0].strip().split("_")[0] + unit = txt.split("[")[1].split("]")[0].strip() + return (name, reduce(lambda a, kv: a.replace(*kv), repls, unit)) + + +def print_dict_attrs(): + all_gsw_function = [ + i + for i in dir(gsw) + if (isinstance(getattr(gsw, i), FunctionType) and not i.startswith("_")) + ] + attrs = {} + names = {} + for func in all_gsw_function: + try: + name, unit = parse_info(func) + except: + name, unit = ("", "") + names[func] = name + attrs[func] = {"units": unit} + print(attrs) + print(names) + +if __name__ == '__main__': + print_dict_attrs()