Skip to content

Commit b90f2b9

Browse files
committed
Finally fixed check-urls.py
1 parent 4d464ec commit b90f2b9

File tree

5 files changed

+100
-53
lines changed

5 files changed

+100
-53
lines changed

codegen/Templates/android/README.mustache

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Scan existing barcodes belonging to 60+ symbologies, including, *Codabar*, *PDF4
1010

1111
## BarCode Processing Features
1212

13-
- [Generate](https://docs.aspose.cloud/barcode/generate-format-and-manipulate-a-barcode-using-cloud-storage/), scan and customize *1D* (linear), *2D* and *postal* barcodes.
13+
- [Generate](https://docs.aspose.cloud/barcode/v4.0/quickstart/#make-an-api-request-from-the-sdk-of-your-choice), scan and customize *1D* (linear), *2D* and *postal* barcodes.
1414
- Generate and recognize barcodes with the checksum option.
1515
- Fetch barcode as an image stream or save the barcode to the local disk.
1616
- Configure barcode height, width, angle quality, margin & resolution.

scripts/check-urls.py

Lines changed: 78 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import sys
66
import threading
77
import time
8+
import typing
9+
import urllib.parse
810
from queue import SimpleQueue
911

1012
from github_job_summary import JobSummary
@@ -29,58 +31,99 @@ class Curl:
2931

3032

3133
CURL_EXIT_CODES_AND_HTTP_CODES = {
32-
"http://schemas.android.com/aapt": (Curl.COULDNT_RESOLVE_HOST, None),
33-
"http://schemas.android.com/apk/res-auto": (Curl.COULDNT_RESOLVE_HOST, None),
34-
"http://schemas.android.com/apk/res/android": (Curl.COULDNT_RESOLVE_HOST, None),
35-
"http://schemas.android.com/tools": (Curl.COULDNT_RESOLVE_HOST, None),
3634
"https://api.aspose.cloud/connect/token": (Curl.HTTP_RETURNED_ERROR, 400),
3735
"https://api.aspose.cloud/v3.0": (Curl.HTTP_RETURNED_ERROR, 404),
38-
"https://id.aspose.cloud/connect/token": (Curl.HTTP_RETURNED_ERROR, 400),
3936
"https://api.aspose.cloud/v4.0": (Curl.HTTP_RETURNED_ERROR, 404),
37+
"https://api.aspose.cloud/v4.0/": (Curl.HTTP_RETURNED_ERROR, 404),
38+
"https://id.aspose.cloud/connect/token": (Curl.HTTP_RETURNED_ERROR, 400),
4039
"https://barcode.qa.aspose.cloud/v3.0/barcode/swagger/spec": (Curl.COULDNT_RESOLVE_HOST, None),
41-
"https://mvnrepository.com/artifact/io.swagger/swagger-codegen-cli": (Curl.HTTP_RETURNED_ERROR, 403),
42-
"https://www.npmjs.com/package/aspose-barcode-cloud-node": (Curl.HTTP_RETURNED_ERROR, 429),
4340
# TODO: Temporary fix
4441
"https://dashboard.aspose.cloud/applications": (Curl.HTTP_RETURNED_ERROR, 404),
4542
}
4643

4744
URLS_TO_IGNORE: frozenset[str] = frozenset(
4845
[
49-
"http://|https://|ftp://",
50-
"http://localhost:$port/",
51-
"http://localhost:47972",
52-
"http://localhost:47972/connect/token",
53-
"http://localhost:47972/v3.0",
54-
"http://localhost:47972/v3.0/barcode/swagger/spec",
55-
"http://some",
56-
"http://tools.ietf.org/html/rfc1341.html",
57-
"http://tools.ietf.org/html/rfc2046",
58-
"http://tools.ietf.org/html/rfc2388",
59-
"http://urllib3.readthedocs.io/en/latest/advanced-usage.html",
6046
"https://api.aspose.cloud",
61-
"https://api.aspose.cloud/v3.0/barcode/scan",
62-
"https://github.com/aspose-barcode-cloud/aspose-barcode-cloud-dotnet/releases/tag/v{{packageVersion}}",
63-
"https://img.shields.io/badge/api-v{{appVersion}}-lightgrey",
64-
"https://pypi.org/project/{{projectName}}/",
65-
"https://repo1.maven.org/maven2/io/swagger/swagger-codegen-cli/2.4.14/swagger-codegen-cli-2.4.14.jar",
66-
"https://tools.ietf.org/html/rfc1521",
67-
"https://unknown",
6847
"https://www.aspose.cloud/404",
69-
"https://www.mojohaus.org/VERSIONS/RULE/2.1.0",
48+
]
49+
)
50+
51+
IGNORE_DOMAINS: frozenset[str] = frozenset(
52+
[
53+
"central.sonatype.org",
54+
"curl.se",
55+
"dart.dev",
56+
"getcomposer.org",
57+
"go.dev",
58+
"maven.apache.org",
59+
"mvnrepository.com",
60+
"mvnrepository.com",
61+
"nodejs.org",
62+
"packagist.org",
63+
"pkg.go.dev",
64+
"pub.dev",
65+
"pypi.org",
66+
"pypi.python.org",
67+
"repo1.maven.org",
68+
"tools.ietf.org",
69+
"urllib3.readthedocs.io",
70+
"www.apache.org",
71+
"www.dartlang.org",
72+
"www.gradle.org",
73+
"www.mojohaus.org",
74+
"www.npmjs.com",
75+
"www.nuget.org",
76+
"www.opensource.org",
77+
"www.php.net",
78+
"www.python.org",
79+
"www.w3.org",
7080
]
7181
)
7282

7383
URL_END_CHARS = r",#\)\"'<>\*\s\\"
74-
URL_RE_PATTERN = r"(https*://[^%s]+)[%s]?" % (URL_END_CHARS, URL_END_CHARS)
84+
URL_RE_PATTERN = r"(https*://[^{0}]+)[{0}]?".format(URL_END_CHARS)
7585
# print(URL_RE_PATTERN)
7686
URL_REGEX = re.compile(URL_RE_PATTERN, re.MULTILINE)
7787

7888
# URL : [Files]
7989
EXTRACTED_URLS_WITH_FILES: dict[str, list[str]] = {k: [] for k in URLS_TO_IGNORE}
8090

8191

82-
def url_extractor(text, filename):
92+
def valid_url(url: str) -> bool:
93+
try:
94+
parsed: urllib.parse.ParseResult = urllib.parse.urlparse(url)
95+
except:
96+
# Malformed URL
97+
return False
98+
else:
99+
domain = parsed.netloc
100+
if "." not in domain:
101+
# Ignore "localhost" and other domains without .
102+
return False
103+
if domain in IGNORE_DOMAINS:
104+
return False
105+
106+
if (
107+
domain.endswith("android.com")
108+
or domain.endswith(".google.com")
109+
or domain.endswith(".microsoft.com")
110+
or domain.endswith(".wikipedia.org")
111+
):
112+
# Ignore popular domain
113+
return False
114+
115+
if "{{" in url or "}}" in url:
116+
# Ignore templates with {{var}}
117+
return False
118+
119+
return True
120+
121+
122+
def url_extractor(text: str, filename: str) -> typing.Generator[str, None, None]:
83123
for url in URL_REGEX.findall(text):
124+
if not valid_url(url):
125+
# print("Ignore:", url)
126+
continue
84127
if url not in EXTRACTED_URLS_WITH_FILES:
85128
EXTRACTED_URLS_WITH_FILES[url] = [filename]
86129
yield url
@@ -99,7 +142,7 @@ def url_extractor(text, filename):
99142
)
100143

101144

102-
def text_extractor(files):
145+
def text_extractor(files: list[str]) -> typing.Generator[tuple[str, str], None, None]:
103146
for filename in files:
104147
if os.path.splitext(filename)[1] in FILES_TO_IGNORE:
105148
continue
@@ -113,10 +156,12 @@ def text_extractor(files):
113156

114157

115158
class Task:
159+
_proc: subprocess.Popen[bytes]
160+
_stderr: str | None
116161
# To avoid 403 responses
117162
USER_AGENT = "Googlebot/2.1 (+http://www.google.com/bot.html)"
118163

119-
def __init__(self, url):
164+
def __init__(self, url: str):
120165
self.url = url
121166
self._proc = subprocess.Popen(
122167
[
@@ -155,12 +200,12 @@ def age(self) -> float:
155200
return time.time() - self._started
156201

157202

158-
def create_new_task(url) -> Task:
203+
def create_new_task(url: str) -> Task:
159204
# print("Create task:", url)
160205
return Task(url)
161206

162207

163-
def process_finished_task(task) -> None:
208+
def process_finished_task(task: Task) -> None:
164209
# print("Finish task:", task.url)
165210
expected_ret_code, expected_http_code = CURL_EXIT_CODES_AND_HTTP_CODES.get(task.url, (0, None))
166211
if task.ret_code == 0 or task.ret_code == expected_ret_code:
@@ -185,7 +230,7 @@ def process_finished_task(task) -> None:
185230
JOB_SUMMARY.add_error(f"Broken URL '{task.url}': {task.stderr}Files: {EXTRACTED_URLS_WITH_FILES[task.url]}")
186231

187232

188-
WORKER_QUEUE: SimpleQueue = SimpleQueue()
233+
WORKER_QUEUE: SimpleQueue[str | None] = SimpleQueue()
189234

190235

191236
def url_checker(num_workers: int = 8) -> None:

scripts/github_job_summary.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def __init__(self, filename: str):
2121
self._success = []
2222
self._lock = Lock()
2323

24-
def close(self):
24+
def close(self) -> None:
2525
assert not self.__file.closed
2626
self.__file.close()
2727

@@ -31,18 +31,18 @@ def __str__(self) -> str:
3131
lines = ["Errors:"] + self._errors
3232
return "\n\n".join(lines)
3333

34-
def _write_line(self, line):
34+
def _write_line(self, line: str) -> None:
3535
with self._lock:
3636
self.__file.write(line.replace("\r", ""))
3737

3838
@property
3939
def has_errors(self) -> bool:
4040
return bool(self._errors)
4141

42-
def add_header(self, text: str, level: int = 3):
42+
def add_header(self, text: str, level: int = 3) -> None:
4343
self._write_line(f"{'#' * level} {text}\n\n")
4444

45-
def add_error(self, text: str):
45+
def add_error(self, text: str) -> None:
4646
"""
4747
See https://github.com/markdown-templates/markdown-emojis
4848
"""
@@ -51,10 +51,10 @@ def add_error(self, text: str):
5151
self._errors.append(text)
5252
self._write_line(f"\n1. :x: {text}\n")
5353

54-
def add_success(self, text: str):
54+
def add_success(self, text: str) -> None:
5555
self._success.append(text)
5656

57-
def finalize(self, format_str: str):
57+
def finalize(self, format_str: str) -> None:
5858
total = len(self._success) + len(self._errors)
5959
self._write_line(
6060
"\n" + format_str.format(total=total, success=len(self._success), failed=len(self._errors)) + "\n"

scripts/open_ai_code_convertion.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66

77
# Environment Variables for OpenAI API Authentication
88
client = OpenAI(
9-
api_key = os.getenv("OPEN_AI_API_KEY"),
10-
organization = os.getenv("OPEN_AI_ORG_ID"),
11-
project = os.getenv("OPEN_AI_PROJECT_ID")
12-
)
9+
api_key=os.getenv("OPEN_AI_API_KEY"),
10+
organization=os.getenv("OPEN_AI_ORG_ID"),
11+
project=os.getenv("OPEN_AI_PROJECT_ID"),
12+
)
13+
1314

1415
# Helper function to convert file content
1516
def convert_code(content, source_lang, target_lang, target_snippet):
@@ -34,8 +35,8 @@ def convert_code(content, source_lang, target_lang, target_snippet):
3435
model="gpt-4o-mini",
3536
messages=[
3637
{"role": "system", "content": "You are a helpful code conversion assistant."},
37-
{"role": "user", "content": prompt}
38-
]
38+
{"role": "user", "content": prompt},
39+
],
3940
)
4041
print("Getted response")
4142
return response.choices[0].message.content
@@ -57,22 +58,23 @@ def process_files(folder, source_ext, target_ext, source_lang, target_lang, targ
5758
if file.endswith(source_ext):
5859
file_path = Path(root) / file
5960
print(f"Processing file: {file_path}")
60-
61-
with open(file_path, 'r') as f:
61+
62+
with open(file_path, "r") as f:
6263
content = f.read()
63-
64+
6465
converted_content = convert_code(content, source_lang, target_lang, target_snippet)
6566
if converted_content:
6667
matches = re.findall(pattern, converted_content)
6768
if matches:
6869
converted_content = matches[0][0].strip()
6970
target_file_path = file_path.with_suffix(target_ext)
70-
with open(target_file_path, 'w') as f:
71+
with open(target_file_path, "w") as f:
7172
f.write(converted_content)
7273
print(f"Converted file saved as: {target_file_path}")
7374
else:
7475
print(f"Failed to convert file: {file_path}")
7576

77+
7678
if __name__ == "__main__":
7779
parser = argparse.ArgumentParser(description="Convert source code files between programming languages.")
7880
parser.add_argument("folder", type=str, help="Path to the folder containing source files.")
@@ -81,12 +83,12 @@ def process_files(folder, source_ext, target_ext, source_lang, target_lang, targ
8183
parser.add_argument("source_lang", type=str, help="Source language (e.g., JavaScript).")
8284
parser.add_argument("target_lang", type=str, help="Target language (e.g., Python).")
8385
parser.add_argument("target_snippet_file", type=str, help="File containing a code snippet in the target language.")
84-
86+
8587
args = parser.parse_args()
8688

8789
# Read the target snippet from the provided file
8890
try:
89-
with open(args.target_snippet_file, 'r') as snippet_file:
91+
with open(args.target_snippet_file, "r") as snippet_file:
9092
target_snippet = snippet_file.read()
9193
except Exception as e:
9294
print(f"Error reading target snippet file: {e}")

submodules/android

Submodule android updated 1 file

0 commit comments

Comments
 (0)