Skip to content

Commit 417aa4d

Browse files
committed
Applied uvx ruff format .
1 parent 85780f3 commit 417aa4d

File tree

1 file changed

+62
-44
lines changed

1 file changed

+62
-44
lines changed

python/extract_har.py

Lines changed: 62 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -12,110 +12,128 @@
1212
import mimetypes
1313
from urllib.parse import urlparse
1414

15+
1516
def get_extension_for_mimetype(mimetype):
1617
"""Get the most common file extension for a given MIME type."""
1718
ext = mimetypes.guess_extension(mimetype)
1819
if ext:
1920
return ext
20-
21+
2122
# Fallback mappings for common types
2223
fallbacks = {
23-
'application/json': '.json',
24-
'image/svg+xml': '.svg',
25-
'text/html': '.html',
26-
'text/css': '.css',
27-
'application/javascript': '.js',
24+
"application/json": ".json",
25+
"image/svg+xml": ".svg",
26+
"text/html": ".html",
27+
"text/css": ".css",
28+
"application/javascript": ".js",
2829
}
29-
return fallbacks.get(mimetype, '.bin')
30+
return fallbacks.get(mimetype, ".bin")
31+
3032

3133
def extract_path_from_url(url):
3234
"""Convert a URL into a filesystem path, preserving the path structure."""
3335
parsed = urlparse(url)
34-
path = parsed.path.lstrip('/')
35-
36+
path = parsed.path.lstrip("/")
37+
3638
# Handle empty paths
3739
if not path:
38-
path = 'index'
39-
40+
path = "index"
41+
4042
# Remove trailing slashes
41-
path = path.rstrip('/')
42-
43+
path = path.rstrip("/")
44+
4345
return path
4446

47+
4548
@click.command()
46-
@click.argument('harzip', type=click.Path(exists=True))
47-
@click.argument('mimetypes', nargs=-1, required=True)
48-
@click.option('-o', '--output', type=click.Path(), default='.',
49-
help='Output directory for extracted files')
50-
@click.option('--paths', is_flag=True,
51-
help='Use URL paths for filenames instead of original names')
52-
@click.option('--pretty-json', is_flag=True,
53-
help='Pretty print JSON files with 2-space indentation')
49+
@click.argument("harzip", type=click.Path(exists=True))
50+
@click.argument("mimetypes", nargs=-1, required=True)
51+
@click.option(
52+
"-o",
53+
"--output",
54+
type=click.Path(),
55+
default=".",
56+
help="Output directory for extracted files",
57+
)
58+
@click.option(
59+
"--paths",
60+
is_flag=True,
61+
help="Use URL paths for filenames instead of original names",
62+
)
63+
@click.option(
64+
"--pretty-json",
65+
is_flag=True,
66+
help="Pretty print JSON files with 2-space indentation",
67+
)
5468
def extract_har(harzip, mimetypes, output, paths, pretty_json):
5569
"""Extract files of specified MIME types from a HAR archive."""
5670
output_dir = Path(output)
5771
output_dir.mkdir(parents=True, exist_ok=True)
58-
72+
5973
with zipfile.ZipFile(harzip) as zf:
6074
# Read the HAR JSON file
6175
try:
62-
har_content = json.loads(zf.read('har.har'))
76+
har_content = json.loads(zf.read("har.har"))
6377
except KeyError:
6478
click.echo("Error: har.har not found in archive", err=True)
6579
return
6680
except json.JSONDecodeError:
6781
click.echo("Error: Invalid JSON in har.har", err=True)
6882
return
69-
83+
7084
# Process each entry
71-
for entry in har_content.get('log', {}).get('entries', []):
72-
response = entry.get('response', {})
73-
content = response.get('content', {})
74-
85+
for entry in har_content.get("log", {}).get("entries", []):
86+
response = entry.get("response", {})
87+
content = response.get("content", {})
88+
7589
# Check if this entry matches our MIME type filter
76-
if content.get('mimeType') not in mimetypes:
90+
if content.get("mimeType") not in mimetypes:
7791
continue
78-
92+
7993
# Get the file reference and URL
80-
file_ref = content.get('_file')
94+
file_ref = content.get("_file")
8195
if not file_ref:
8296
continue
83-
84-
request_url = entry.get('request', {}).get('url', '')
85-
97+
98+
request_url = entry.get("request", {}).get("url", "")
99+
86100
try:
87101
# Extract the file
88102
file_content = zf.read(file_ref)
89-
103+
90104
if paths:
91105
# Use URL path for filename
92106
path = extract_path_from_url(request_url)
93107
# Add appropriate extension if not present
94108
if not Path(path).suffix:
95-
path += get_extension_for_mimetype(content['mimeType'])
109+
path += get_extension_for_mimetype(content["mimeType"])
96110
outpath = output_dir / path
97111
else:
98112
# Use original filename
99113
outpath = output_dir / file_ref
100-
114+
101115
# Ensure parent directories exist
102116
outpath.parent.mkdir(parents=True, exist_ok=True)
103-
117+
104118
# Handle JSON pretty printing if requested
105-
if pretty_json and content['mimeType'] == 'application/json':
119+
if pretty_json and content["mimeType"] == "application/json":
106120
try:
107121
json_data = json.loads(file_content)
108-
file_content = json.dumps(json_data, indent=2).encode('utf-8')
122+
file_content = json.dumps(json_data, indent=2).encode("utf-8")
109123
except json.JSONDecodeError:
110-
click.echo(f"Warning: Could not pretty print {outpath} - invalid JSON", err=True)
111-
124+
click.echo(
125+
f"Warning: Could not pretty print {outpath} - invalid JSON",
126+
err=True,
127+
)
128+
112129
# Write the file
113130
outpath.write_bytes(file_content)
114131
click.echo(f"Extracted: {outpath}")
115-
132+
116133
except KeyError:
117134
click.echo(f"Warning: File {file_ref} not found in archive", err=True)
118135
continue
119136

120-
if __name__ == '__main__':
137+
138+
if __name__ == "__main__":
121139
extract_har()

0 commit comments

Comments
 (0)