diff --git a/pyproject.toml b/pyproject.toml index 6342911..5299d01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,6 @@ dependencies = [ "ga4gh.vrs >=2.1.3,<3.0", "wags-tails ~= 0.4.0", "bioutils", - "pip", ] dynamic = ["version"] diff --git a/src/cool_seq_tool/resources/status.py b/src/cool_seq_tool/resources/status.py index 556fd0c..95211c9 100644 --- a/src/cool_seq_tool/resources/status.py +++ b/src/cool_seq_tool/resources/status.py @@ -3,16 +3,16 @@ import logging from collections import namedtuple from pathlib import Path +from urllib.parse import urlparse from agct._core import ChainfileError from asyncpg import InvalidCatalogNameError, UndefinedTableError from biocommons.seqrepo import SeqRepo -from pip._internal.utils.misc import redact_auth_from_url from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess from cool_seq_tool.mappers.liftover import LiftOver from cool_seq_tool.resources.data_files import DataFile, get_data_file -from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase +from cool_seq_tool.sources.uta_database import UTA_DB_URL, ParseResult, UtaDatabase _logger = logging.getLogger(__name__) @@ -120,9 +120,12 @@ async def check_status( else: status["liftover"] = True - sanitized_url = redact_auth_from_url(UTA_DB_URL) + parsed_result = ParseResult(urlparse(db_url)) + sanitized_url = parsed_result.sanitized_url try: await UtaDatabase.create(db_url) + except ValueError: + _logger.exception("Database URL is not valid") except (OSError, InvalidCatalogNameError, UndefinedTableError): _logger.exception( "Encountered error instantiating UTA at URI %s", sanitized_url diff --git a/src/cool_seq_tool/sources/uta_database.py b/src/cool_seq_tool/sources/uta_database.py index 35fc873..c3c2a0e 100644 --- a/src/cool_seq_tool/sources/uta_database.py +++ b/src/cool_seq_tool/sources/uta_database.py @@ -5,7 +5,7 @@ from os import environ from typing import Any, Literal, TypeVar from urllib.parse import ParseResult as UrlLibParseResult -from urllib.parse import quote, unquote, urlparse +from urllib.parse import quote, unquote, urlparse, urlunparse import asyncpg import boto3 @@ -954,3 +954,28 @@ def schema(self) -> str | None: """Create schema property.""" path_elems = self.path.split("/") return path_elems[2] if len(path_elems) > 2 else None + + @property + def sanitized_url(self) -> str: + """Sanitized DB URL with the password masked""" + netloc = "" + if self.username: + netloc += self.username + if self.password is not None and self.password != "": + netloc += ":***" + netloc += "@" + if self.hostname: + netloc += f"{self.hostname}" + if self.port: + netloc += f":{self.port}" + + return urlunparse( + ( + self.scheme, + netloc, + self.path, + self.params, + self.query, + self.fragment, + ) + ) diff --git a/tests/sources/test_uta_database.py b/tests/sources/test_uta_database.py index ab89385..7f96c78 100644 --- a/tests/sources/test_uta_database.py +++ b/tests/sources/test_uta_database.py @@ -1,11 +1,14 @@ """Test UTA data source.""" +from urllib.parse import urlparse + import pytest from cool_seq_tool.schemas import Strand from cool_seq_tool.sources.uta_database import ( GenomicTxData, GenomicTxMetadata, + ParseResult, TxExonAlnData, ) @@ -360,3 +363,98 @@ async def test_get_mane_transcripts_from_genomic_pos(test_db): # invalid ac resp = await test_db.get_transcripts_from_genomic_pos("NC_000007.14232", 140753336) assert resp == [] + + +@pytest.mark.parametrize( + ("raw_url", "expected"), + [ + # Username + password + ( + "postgresql://user:pass@localhost:5432/dbname", + { + "scheme": "postgresql", + "username": "user", + "password": "pass", + "hostname": "localhost", + "port": 5432, + "database": "dbname", + "sanitized_url": "postgresql://user:***@localhost:5432/dbname", + }, + ), + # Username with null password + ( + "postgresql://user@localhost/dbname", + { + "scheme": "postgresql", + "username": "user", + "password": None, + "hostname": "localhost", + "port": None, + "database": "dbname", + "sanitized_url": "postgresql://user@localhost/dbname", + }, + ), + # Password is "0" + ( + "postgresql://user:0@localhost/dbname", + { + "scheme": "postgresql", + "username": "user", + "password": "0", + "hostname": "localhost", + "port": None, + "database": "dbname", + "sanitized_url": "postgresql://user:***@localhost/dbname", + }, + ), + # Empty password + ( + "postgresql://user:@localhost/dbname", + { + "scheme": "postgresql", + "username": "user", + "password": "", + "hostname": "localhost", + "port": None, + "database": "dbname", + "sanitized_url": "postgresql://user@localhost/dbname", + }, + ), + # No username + ( + "postgresql://localhost:5432/dbname", + { + "scheme": "postgresql", + "username": None, + "password": None, + "hostname": "localhost", + "port": 5432, + "database": "dbname", + "sanitized_url": "postgresql://localhost:5432/dbname", + }, + ), + # With query params + ( + "postgresql://user:secret@localhost/dbname?query#fragment", + { + "scheme": "postgresql", + "username": "user", + "password": "secret", + "hostname": "localhost", + "port": None, + "database": "dbname", + "sanitized_url": "postgresql://user:***@localhost/dbname?query#fragment", + }, + ), + ], +) +async def test_parsed_url(raw_url, expected): + parsed_result = ParseResult(urlparse(raw_url)) + + assert parsed_result.scheme == expected["scheme"] + assert parsed_result.username == expected["username"] + assert parsed_result.password == expected["password"] + assert parsed_result.hostname == expected["hostname"] + assert parsed_result.port == expected["port"] + assert parsed_result.database == expected["database"] + assert parsed_result.sanitized_url == expected["sanitized_url"]