Skip to content

Commit 322a864

Browse files
authored
Merge pull request #23 from jeffkala/develop
bump version
2 parents c0d2d7a + de43118 commit 322a864

23 files changed

+406
-243
lines changed

.github/workflows/basics.yml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
---
2+
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
3+
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
4+
5+
name: "Python package"
6+
7+
on: ["push"] # yamllint disable-line rule:truthy
8+
9+
jobs:
10+
build:
11+
12+
runs-on: "ubuntu-latest"
13+
strategy:
14+
fail-fast: false
15+
matrix:
16+
python-version: [3.6, 3.7, 3.8, 3.9]
17+
18+
steps:
19+
- uses: "actions/checkout@v2"
20+
- name: "Set up Python ${{ matrix.python-version }}"
21+
uses: "actions/setup-python@v2"
22+
with:
23+
python-version: "${{ matrix.python-version }}"
24+
- name: "Install dependencies"
25+
run: |
26+
python -m pip install --upgrade pip
27+
python -m pip install toml invoke poetry black
28+
invoke build --no-cache
29+
- name: "Run All Static Code Analysis and Tests"
30+
run: |
31+
invoke tests

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
## Changelog
2+
3+
## v0.1.2 - 2021-12-02
4+
5+
### Added
6+
7+
- #21 add redirect support and initial logging #21
8+
9+
## v0.1.1 - 2021-07-17
10+
11+
Add linting, contributing, static analysis
12+
113
## v0.1.0 - 2021-07-15
214

315
Initial release

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@ Alternatively,
2525

2626
you can replace `python cli.py` with `pyurlcheck` on the command line.
2727

28+
```
29+
▶ pyurlcheck pyurlcheck/examples/
30+
pyurlcheck/examples/example3.txt:4 URL Issue: https://www.ansible.com/jeff
31+
pyurlcheck/examples/example2.md:7 URL Issue: https://www.ansible.com/jeff
32+
pyurlcheck/examples/example3.md:3 URL Issue: https://www.ansible.com/jeff
33+
pyurlcheck/examples/example4.rst:22 URL Issue: http://google.com/france
34+
pyurlcheck/examples/example4.rst:23 URL Issue: http://google.com/japan
35+
pyurlcheck/examples/example1.md:9 URL Issue: https://www.ansible.com/jeff
36+
```
37+
38+
File extensions are currently not checked; therefore all files in a directory that is passed in will be validated.
39+
2840
## Installation
2941

3042
```

poetry.lock

Lines changed: 20 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "pyurlcheck"
3-
version = "0.1.1"
3+
version = "0.1.2"
44
description = "Search docs and validate URLs found are working properly."
55
authors = ["Jeff Kala <jeff.l.kala@gmail.com>"]
66
readme = "README.md"
@@ -18,6 +18,7 @@ pyurlcheck = "pyurlcheck.cli:main"
1818
python = "^3.6"
1919
click = "^7.1.2"
2020
requests = "^2.25.1"
21+
dnspython = "^2.1.0"
2122

2223
[tool.poetry.dev-dependencies]
2324
pytest = "*"

pyurlcheck/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""Pyurlcheck."""
22

3-
__version__ = "0.1.1"
3+
__version__ = "0.1.2"

pyurlcheck/check.py

Lines changed: 33 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,45 @@
11
"""Check Whether URL is Public or Private."""
2-
import socket
2+
import logging
33
import ipaddress
44
from urllib.parse import urlparse
5+
from dns import resolver
56

67

7-
def _is_private(ipaddr):
8-
"""Check if Private RFC1918."""
9-
return ipaddress.ip_address(ipaddr).is_private
8+
def split_url(url):
9+
"""Parses the URL into usable parts.
1010
11+
Args:
12+
url (str): Exact URL found from the regex.
1113
12-
def _get_ip(url):
13-
"""Take a URL and execute a name lookup to retreive IP Address Object."""
14-
return socket.gethostbyname(url)
14+
Returns:
15+
ParseResult: Includes scheme, netloc, path, params, query, fragments.
16+
"""
17+
url_split = urlparse(url)
18+
logging.debug("URL Split: %s", url_split)
19+
return url_split
1520

1621

17-
class CheckUrl:
18-
"""Check URL class will be used for checks that need to be done on a URL."""
22+
def get_ip(domain):
23+
"""Take a URL and execute a name lookup to retreive IP Address Object.
1924
20-
def __init__(self, url):
21-
"""Initialize URL Check."""
22-
self.url = url
25+
Args:
26+
domain (str): URL from netloc in parse.
2327
24-
def split_url(self):
25-
"""Take Full FQDN URL and split it into usable parts."""
26-
url_split = urlparse(self.url)
27-
return url_split
28+
Returns:
29+
str: IP Address
30+
"""
31+
result = resolver.resolve(domain, "A")
32+
logging.debug("DNS Resolver: %s", result[0])
33+
return result[0].address
2834

29-
def is_private(self):
30-
"""Take a URL manipulate it and check if it's Private RFC1918."""
31-
split_url = self.split_url()
32-
ip_addr = _get_ip(split_url)
33-
return _is_private(ip_addr)
35+
36+
def is_private(ip_addr):
37+
"""Take a URL manipulate it and check if it's Private RFC1918.
38+
39+
Args:
40+
ip (str): IP address from the dns resolution.
41+
42+
Returns:
43+
Boolean: True/False whether address is RFC1918 space.
44+
"""
45+
return ipaddress.ip_address(ip_addr).is_private

pyurlcheck/cli.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,50 @@
11
"""Example cli using click."""
2+
import logging
23
import sys
34

45
import click
56

6-
from pyurlcheck.check import CheckUrl
7+
from pyurlcheck.check import split_url, get_ip, is_private
78
from pyurlcheck.find import FindUrls
89
from pyurlcheck.validate import ValidateUrl
910

1011

12+
LOGGER = logging.getLogger(__name__)
13+
14+
1115
@click.command()
1216
@click.argument("input_data", type=click.Path(exists=True, file_okay=True, dir_okay=True), required=True)
13-
def main(input_data):
17+
@click.option("--log_level", type=click.Choice(["INFO", "DEBUG"], case_sensitive=False), default="INFO")
18+
def main(input_data, log_level):
1419
"""Entry point into the pyurlcheck command line tool.
1520
1621
Args:
1722
input_data (str): Either filename or directory to search for URLs.
23+
log_level (str): Logging level to execute with. Choices ['INFO', 'DEBUG']. Default is INFO.
1824
"""
25+
logging.basicConfig(format="%(asctime)s %(message)s", stream=sys.stdout, level=getattr(logging, log_level))
1926
results = []
2027
files_urls = FindUrls(input_data).find_urls()
2128
for file_name, url_list in files_urls.items():
2229
for line_num, urls in url_list.items():
2330
for url in urls:
24-
url_details = CheckUrl(url).split_url()
25-
if url_details.scheme == "":
26-
is_valid = ValidateUrl(url, need_scheme=True).validate()
27-
# print(f"URL is {url}")
28-
# print(f"Is RFC1918: {is_private}")
29-
else:
30-
is_valid = ValidateUrl(url).validate()
31-
if not is_valid:
32-
results.append(f"{file_name}:{line_num + 1}\tURL Issue: {url}")
31+
url_details = split_url(url)
32+
# RFC 1918 Check, if True don't validate.
33+
if not is_private(get_ip(url_details.netloc)):
34+
if url_details.scheme == "":
35+
is_valid, has_redirects = ValidateUrl(url, need_scheme=True).validate()
36+
else:
37+
is_valid, has_redirects = ValidateUrl(url).validate()
38+
if not is_valid:
39+
results.append(f"{file_name}:{line_num + 1}\tURL Issue: {url}")
40+
if has_redirects:
41+
LOGGER.info(
42+
"Redirect_Warning: %s had redirects while executing. Redirects are ' => '.join(%s)!",
43+
url,
44+
has_redirects,
45+
)
3346
if len(results) > 0:
34-
print("\n".join(results))
47+
LOGGER.info("\n".join(results))
3548
sys.exit(len(results))
3649
else:
3750
sys.exit(0)

pyurlcheck/constants.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
"""Define Constants Until Config Options are Created."""
2+
3+
IGNORE_DIRS = [
4+
"__pycache__",
5+
"img",
6+
"images",
7+
]

pyurlcheck/examples/example3.md

Lines changed: 0 additions & 3 deletions
This file was deleted.

0 commit comments

Comments
 (0)