From df85c207bfc43df68e3cbd367d09edaba612b2be Mon Sep 17 00:00:00 2001 From: Kori Kuzma Date: Thu, 18 Sep 2025 07:53:36 -0400 Subject: [PATCH 1/4] feat: add sanitized_url property to parse parse result --- pyproject.toml | 1 - src/cool_seq_tool/resources/status.py | 9 ++++++--- src/cool_seq_tool/sources/uta_database.py | 5 +++++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6342911..5299d01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,6 @@ dependencies = [ "ga4gh.vrs >=2.1.3,<3.0", "wags-tails ~= 0.4.0", "bioutils", - "pip", ] dynamic = ["version"] diff --git a/src/cool_seq_tool/resources/status.py b/src/cool_seq_tool/resources/status.py index 556fd0c..e1ca199 100644 --- a/src/cool_seq_tool/resources/status.py +++ b/src/cool_seq_tool/resources/status.py @@ -3,16 +3,16 @@ import logging from collections import namedtuple from pathlib import Path +from urllib.parse import urlparse from agct._core import ChainfileError from asyncpg import InvalidCatalogNameError, UndefinedTableError from biocommons.seqrepo import SeqRepo -from pip._internal.utils.misc import redact_auth_from_url from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess from cool_seq_tool.mappers.liftover import LiftOver from cool_seq_tool.resources.data_files import DataFile, get_data_file -from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase +from cool_seq_tool.sources.uta_database import UTA_DB_URL, ParseResult, UtaDatabase _logger = logging.getLogger(__name__) @@ -120,9 +120,12 @@ async def check_status( else: status["liftover"] = True - sanitized_url = redact_auth_from_url(UTA_DB_URL) try: + parsed_result = ParseResult(urlparse(db_url)) + sanitized_url = parsed_result.sanitized_url await UtaDatabase.create(db_url) + except ValueError: + _logger.exception("Database URL is not valid") except (OSError, InvalidCatalogNameError, UndefinedTableError): _logger.exception( "Encountered error instantiating UTA at URI %s", sanitized_url diff --git a/src/cool_seq_tool/sources/uta_database.py b/src/cool_seq_tool/sources/uta_database.py index 35fc873..1bfafb8 100644 --- a/src/cool_seq_tool/sources/uta_database.py +++ b/src/cool_seq_tool/sources/uta_database.py @@ -954,3 +954,8 @@ def schema(self) -> str | None: """Create schema property.""" path_elems = self.path.split("/") return path_elems[2] if len(path_elems) > 2 else None + + @property + def sanitized_url(self) -> str: + """Sanitized DB URL with the password masked""" + return f"{self.scheme}://{self.username}:****@{self.hostname}:{self.port}/{self.database}/{self.schema}" From 195f68912bd706d2a9ca0bbc129b1479374ccba6 Mon Sep 17 00:00:00 2001 From: jarbesfeld Date: Thu, 18 Sep 2025 10:47:44 -0400 Subject: [PATCH 2/4] Use urlunparse --- src/cool_seq_tool/sources/uta_database.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/cool_seq_tool/sources/uta_database.py b/src/cool_seq_tool/sources/uta_database.py index 1bfafb8..c01c92e 100644 --- a/src/cool_seq_tool/sources/uta_database.py +++ b/src/cool_seq_tool/sources/uta_database.py @@ -5,7 +5,7 @@ from os import environ from typing import Any, Literal, TypeVar from urllib.parse import ParseResult as UrlLibParseResult -from urllib.parse import quote, unquote, urlparse +from urllib.parse import quote, unquote, urlparse, urlunparse import asyncpg import boto3 @@ -958,4 +958,13 @@ def schema(self) -> str | None: @property def sanitized_url(self) -> str: """Sanitized DB URL with the password masked""" - return f"{self.scheme}://{self.username}:****@{self.hostname}:{self.port}/{self.database}/{self.schema}" + return urlunparse( + ( + self.scheme, + self.username, + self.hostname, + self.port, + self.database, + self.schema, + ) + ) From 3b683cc7cacf11214e93397a1eff143a63840cb4 Mon Sep 17 00:00:00 2001 From: Katie Stahl Date: Fri, 19 Sep 2025 12:08:22 -0400 Subject: [PATCH 3/4] fix: variable scope --- src/cool_seq_tool/resources/status.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cool_seq_tool/resources/status.py b/src/cool_seq_tool/resources/status.py index e1ca199..95211c9 100644 --- a/src/cool_seq_tool/resources/status.py +++ b/src/cool_seq_tool/resources/status.py @@ -120,9 +120,9 @@ async def check_status( else: status["liftover"] = True + parsed_result = ParseResult(urlparse(db_url)) + sanitized_url = parsed_result.sanitized_url try: - parsed_result = ParseResult(urlparse(db_url)) - sanitized_url = parsed_result.sanitized_url await UtaDatabase.create(db_url) except ValueError: _logger.exception("Database URL is not valid") From 2d4e38db8bb85337d1c2f7f33b8326d319a885c9 Mon Sep 17 00:00:00 2001 From: Katie Stahl Date: Fri, 19 Sep 2025 12:57:41 -0400 Subject: [PATCH 4/4] fix: ensure params sent to urlunparse are strings --- src/cool_seq_tool/sources/uta_database.py | 21 +++-- tests/sources/test_uta_database.py | 98 +++++++++++++++++++++++ 2 files changed, 114 insertions(+), 5 deletions(-) diff --git a/src/cool_seq_tool/sources/uta_database.py b/src/cool_seq_tool/sources/uta_database.py index c01c92e..c3c2a0e 100644 --- a/src/cool_seq_tool/sources/uta_database.py +++ b/src/cool_seq_tool/sources/uta_database.py @@ -958,13 +958,24 @@ def schema(self) -> str | None: @property def sanitized_url(self) -> str: """Sanitized DB URL with the password masked""" + netloc = "" + if self.username: + netloc += self.username + if self.password is not None and self.password != "": + netloc += ":***" + netloc += "@" + if self.hostname: + netloc += f"{self.hostname}" + if self.port: + netloc += f":{self.port}" + return urlunparse( ( self.scheme, - self.username, - self.hostname, - self.port, - self.database, - self.schema, + netloc, + self.path, + self.params, + self.query, + self.fragment, ) ) diff --git a/tests/sources/test_uta_database.py b/tests/sources/test_uta_database.py index ab89385..7f96c78 100644 --- a/tests/sources/test_uta_database.py +++ b/tests/sources/test_uta_database.py @@ -1,11 +1,14 @@ """Test UTA data source.""" +from urllib.parse import urlparse + import pytest from cool_seq_tool.schemas import Strand from cool_seq_tool.sources.uta_database import ( GenomicTxData, GenomicTxMetadata, + ParseResult, TxExonAlnData, ) @@ -360,3 +363,98 @@ async def test_get_mane_transcripts_from_genomic_pos(test_db): # invalid ac resp = await test_db.get_transcripts_from_genomic_pos("NC_000007.14232", 140753336) assert resp == [] + + +@pytest.mark.parametrize( + ("raw_url", "expected"), + [ + # Username + password + ( + "postgresql://user:pass@localhost:5432/dbname", + { + "scheme": "postgresql", + "username": "user", + "password": "pass", + "hostname": "localhost", + "port": 5432, + "database": "dbname", + "sanitized_url": "postgresql://user:***@localhost:5432/dbname", + }, + ), + # Username with null password + ( + "postgresql://user@localhost/dbname", + { + "scheme": "postgresql", + "username": "user", + "password": None, + "hostname": "localhost", + "port": None, + "database": "dbname", + "sanitized_url": "postgresql://user@localhost/dbname", + }, + ), + # Password is "0" + ( + "postgresql://user:0@localhost/dbname", + { + "scheme": "postgresql", + "username": "user", + "password": "0", + "hostname": "localhost", + "port": None, + "database": "dbname", + "sanitized_url": "postgresql://user:***@localhost/dbname", + }, + ), + # Empty password + ( + "postgresql://user:@localhost/dbname", + { + "scheme": "postgresql", + "username": "user", + "password": "", + "hostname": "localhost", + "port": None, + "database": "dbname", + "sanitized_url": "postgresql://user@localhost/dbname", + }, + ), + # No username + ( + "postgresql://localhost:5432/dbname", + { + "scheme": "postgresql", + "username": None, + "password": None, + "hostname": "localhost", + "port": 5432, + "database": "dbname", + "sanitized_url": "postgresql://localhost:5432/dbname", + }, + ), + # With query params + ( + "postgresql://user:secret@localhost/dbname?query#fragment", + { + "scheme": "postgresql", + "username": "user", + "password": "secret", + "hostname": "localhost", + "port": None, + "database": "dbname", + "sanitized_url": "postgresql://user:***@localhost/dbname?query#fragment", + }, + ), + ], +) +async def test_parsed_url(raw_url, expected): + parsed_result = ParseResult(urlparse(raw_url)) + + assert parsed_result.scheme == expected["scheme"] + assert parsed_result.username == expected["username"] + assert parsed_result.password == expected["password"] + assert parsed_result.hostname == expected["hostname"] + assert parsed_result.port == expected["port"] + assert parsed_result.database == expected["database"] + assert parsed_result.sanitized_url == expected["sanitized_url"]