Skip to content

Commit 5554925

Browse files
committed
add_hostname_column
1 parent b5cccec commit 5554925

File tree

4 files changed

+38
-4
lines changed

4 files changed

+38
-4
lines changed

library/playback/media_printer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def media_printer(args, data, units: str | None = "media", media_len=None) -> No
102102
print_args = getattr(args, "print", "")
103103
cols = getattr(args, "cols", [])
104104
m_columns = db_utils.columns(args, "media")
105+
print(action)
105106

106107
if (args.limit or args.timeout_size) and "path" in data[0].keys():
107108
new_data = []

library/utils/path_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,9 @@ def folder_utime(folder_path, times: tuple[int, int] | tuple[float, float]):
230230

231231
def domain_from_url(tracker):
232232
url = urlparse(tracker)
233-
domain = url.netloc.rsplit(":")[0].lower()
234-
return domain
233+
if url.hostname:
234+
return url.hostname.lower()
235+
return url.netloc.rsplit(":")[0].lower()
235236

236237

237238
def tld_from_url(url):

library/utils/sqlgroups.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,21 @@
66
perf_randomize_using_ids = filter_engine.perf_randomize_using_ids
77

88

9+
def add_hostname_column(args):
10+
m_columns = db_utils.columns(args, "media")
11+
from library.utils.path_utils import domain_from_url
12+
13+
args.db.register_function(domain_from_url, deterministic=True)
14+
if m_columns and "hostname" not in m_columns:
15+
args.db.execute("ALTER TABLE media ADD COLUMN hostname TEXT")
16+
m_columns.add("hostname")
17+
18+
with args.db.conn:
19+
args.db.execute("UPDATE media SET hostname = domain_from_url(path) WHERE hostname IS NULL")
20+
args.db.execute("CREATE INDEX IF NOT EXISTS media_hostname_idx ON media (hostname)")
21+
return m_columns
22+
23+
924
def fs_sql(args, limit) -> tuple[str, dict]:
1025
m_columns = db_utils.columns(args, "media")
1126
args.table, m_columns = sql_utils.search_filter(args, m_columns)
@@ -122,7 +137,7 @@ def historical_media(args):
122137

123138

124139
def construct_links_query(args, limit) -> tuple[str, dict]:
125-
m_columns = db_utils.columns(args, "media")
140+
m_columns = add_hostname_column(args)
126141
args.table, m_columns = sql_utils.search_filter(args, m_columns)
127142

128143
if getattr(args, "category", None) and "category" in m_columns:
@@ -167,7 +182,7 @@ def construct_links_query(args, limit) -> tuple[str, dict]:
167182

168183

169184
def construct_tabs_query(args) -> tuple[str, dict]:
170-
m_columns = db_utils.columns(args, "media")
185+
m_columns = add_hostname_column(args)
171186
args.table, m_columns = sql_utils.search_filter(args, m_columns)
172187

173188
query = f"""WITH media_history as (

tests/utils/test_path_utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,23 @@ def test_safe_join(user_path, expected):
104104
assert utils.p(result) == utils.p(expected)
105105

106106

107+
@pytest.mark.parametrize(
108+
("url", "expected"),
109+
[
110+
("http://example.com/path/to/file.txt", "example.com"),
111+
("https://www.example.org/another/file.jpg", "www.example.org"),
112+
("ftp://fileserver.net/pub/document.pdf", "fileserver.net"),
113+
("http://example.com:8080/path", "example.com"),
114+
("https://user:password@example.com/path", "example.com"),
115+
("http://[::1]/path", "::1"),
116+
("http://127.0.0.1/path", "127.0.0.1"),
117+
("https://sub.domain.example.com/path", "sub.domain.example.com"),
118+
],
119+
)
120+
def test_domain_from_url(url, expected):
121+
assert path_utils.domain_from_url(url) == expected
122+
123+
107124
@pytest.mark.parametrize(
108125
("url", "expected_parent_path", "expected_filename"),
109126
[

0 commit comments

Comments
 (0)