diff --git a/services/api-server/requirements/_base.in b/services/api-server/requirements/_base.in index 58825ffc491c..95834089851f 100644 --- a/services/api-server/requirements/_base.in +++ b/services/api-server/requirements/_base.in @@ -17,6 +17,7 @@ --requirement ../../../packages/service-library/requirements/_base.in --requirement ../../../packages/service-library/requirements/_fastapi.in +aiocache aiofiles cryptography fastapi-pagination diff --git a/services/api-server/requirements/_base.txt b/services/api-server/requirements/_base.txt index 2c91b49add5e..01ef368114df 100644 --- a/services/api-server/requirements/_base.txt +++ b/services/api-server/requirements/_base.txt @@ -9,6 +9,7 @@ aiocache==0.12.3 # -r requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in # -r requirements/../../../packages/simcore-sdk/requirements/_base.in + # -r requirements/_base.in aiodebug==2.3.0 # via # -r requirements/../../../packages/celery-library/requirements/../../../packages/service-library/requirements/_base.in diff --git a/services/api-server/src/simcore_service_api_server/repository/_base.py b/services/api-server/src/simcore_service_api_server/repository/_base.py index 4a20b37c735c..8853d6d0c2a8 100644 --- a/services/api-server/src/simcore_service_api_server/repository/_base.py +++ b/services/api-server/src/simcore_service_api_server/repository/_base.py @@ -1,7 +1,61 @@ from dataclasses import dataclass +from typing import Final from sqlalchemy.ext.asyncio import AsyncEngine +## Memory Usage of aiocache for Session Authentication Caching +AUTH_SESSION_TTL_SECONDS: Final = 120 # 2 minutes +""" + ### Memory Usage Characteristics + + **aiocache** uses in-memory storage by default, which means: + + 1. **Linear memory growth**: Each cached item consumes RAM proportional to the serialized size of the cached data + 2. **No automatic memory limits**: By default, there's no built-in maximum memory cap + 3. **TTL-based cleanup**: Items are only removed when they expire (TTL) or are explicitly deleted + + + **Key limitations:** + - **MEMORY backend**: No built-in memory limits or LRU eviction + - **Maximum capacity**: Limited only by available system RAM + - **Risk**: Memory leaks if TTL is too long or cache keys grow unbounded + + ### Recommendations for Your Use Case + + **For authentication caching:** + + 1. **Low memory impact**: User authentication data is typically small (user_id, email, product_name) + 2. **Short TTL**: Your 120s TTL helps prevent unbounded growth + 3. **Bounded key space**: API keys are finite, not user-generated + + **Memory estimation:** + ``` + Per cache entry ≈ 200-500 bytes (user data + overhead) + 1000 active users ≈ 500KB + 10000 active users ≈ 5MB + ``` + + ### Alternative Approaches + + **If memory becomes a concern:** + + 1. **Redis backend**: + ```python + cache = Cache(Cache.REDIS, endpoint="redis://localhost", ...) + ``` + + 2. **Custom eviction policy**: Implement LRU manually or use shorter TTL + + 3. **Monitoring**: Track cache size in production: + ```python + # Check cache statistics + cache_stats = await cache.get_stats() + ``` + + **Verdict**: + For authentication use case with reasonable user counts (<10K active), memory impact should be minimal with your current TTL configuration. +""" + @dataclass class BaseRepository: diff --git a/services/api-server/src/simcore_service_api_server/repository/api_keys.py b/services/api-server/src/simcore_service_api_server/repository/api_keys.py index 2ab34849c7ca..c418f9650396 100644 --- a/services/api-server/src/simcore_service_api_server/repository/api_keys.py +++ b/services/api-server/src/simcore_service_api_server/repository/api_keys.py @@ -2,13 +2,14 @@ from typing import NamedTuple import sqlalchemy as sa +from aiocache import cached # type: ignore[import-untyped] from models_library.products import ProductName from pydantic.types import PositiveInt from simcore_postgres_database.models.api_keys import api_keys as auth_api_keys_table from simcore_postgres_database.utils_repos import pass_or_acquire_connection from sqlalchemy.ext.asyncio import AsyncConnection -from ._base import BaseRepository +from ._base import AUTH_SESSION_TTL_SECONDS, BaseRepository _logger = logging.getLogger(__name__) @@ -21,13 +22,24 @@ class UserAndProductTuple(NamedTuple): class ApiKeysRepository(BaseRepository): """Auth access""" + @cached( + ttl=AUTH_SESSION_TTL_SECONDS, + key_builder=lambda *_args, **kwargs: f"api_auth:{kwargs['api_key']}", + namespace=__name__, + noself=True, + ) async def get_user( self, connection: AsyncConnection | None = None, *, api_key: str, - api_secret: str + api_secret: str, ) -> UserAndProductTuple | None: + """Validates API key and secret, returning user info if valid otherwise None. + + WARNING: Cached for 120s TTL - secret validation occurs every 2 minutes. + NOTE: to disable caching set AIOCACHE_DISABLE=1 + """ stmt = sa.select( auth_api_keys_table.c.user_id, diff --git a/services/api-server/src/simcore_service_api_server/repository/users.py b/services/api-server/src/simcore_service_api_server/repository/users.py index f09335ecdef5..57b35dacbc68 100644 --- a/services/api-server/src/simcore_service_api_server/repository/users.py +++ b/services/api-server/src/simcore_service_api_server/repository/users.py @@ -1,4 +1,5 @@ import sqlalchemy as sa +from aiocache import Cache, cached # type: ignore[import-untyped] from common_library.users_enums import UserStatus from models_library.emails import LowerCaseEmailStr from models_library.users import UserID @@ -7,16 +8,34 @@ from simcore_postgres_database.utils_repos import pass_or_acquire_connection from sqlalchemy.ext.asyncio import AsyncConnection -from ._base import BaseRepository +from ._base import AUTH_SESSION_TTL_SECONDS, BaseRepository class UsersRepository(BaseRepository): + @cached( + ttl=AUTH_SESSION_TTL_SECONDS, + key_builder=lambda *_args, **kwargs: f"user_email:{kwargs['user_id']}", + cache=Cache.MEMORY, + namespace=__name__, + noself=True, + ) async def get_active_user_email( self, connection: AsyncConnection | None = None, *, user_id: UserID, ) -> LowerCaseEmailStr | None: + """Retrieves the email address of an active user. + + Arguments: + user_id -- The ID of the user whose email is to be retrieved. + + Returns: + The email address of the user if found, otherwise None. + + WARNING: Cached for 120s TTL - email changes will not be seen for 2 minutes. + NOTE: to disable caching set AIOCACHE_DISABLE=1 + """ async with pass_or_acquire_connection(self.db_engine, connection) as conn: email = await conn.scalar( sa.select(users.c.email).where( diff --git a/services/api-server/tests/unit/_with_db/authentication/conftest.py b/services/api-server/tests/unit/_with_db/authentication/conftest.py new file mode 100644 index 000000000000..44a6af616224 --- /dev/null +++ b/services/api-server/tests/unit/_with_db/authentication/conftest.py @@ -0,0 +1,58 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=unused-variable +# pylint: disable=too-many-arguments + +from collections.abc import AsyncGenerator, AsyncIterator, Callable + +import pytest +from asgi_lifespan import LifespanManager +from fastapi import FastAPI +from models_library.api_schemas_api_server.api_keys import ApiKeyInDB +from pydantic import PositiveInt +from simcore_service_api_server.clients.postgres import get_engine +from simcore_service_api_server.repository.api_keys import ApiKeysRepository +from simcore_service_api_server.repository.users import UsersRepository +from sqlalchemy.ext.asyncio import AsyncEngine + +MAX_TIME_FOR_APP_TO_STARTUP = 10 +MAX_TIME_FOR_APP_TO_SHUTDOWN = 10 + + +@pytest.fixture +async def app_started(app: FastAPI, is_pdb_enabled: bool) -> AsyncIterator[FastAPI]: + # LifespanManager will trigger app's startup&shutown event handlers + async with LifespanManager( + app, + startup_timeout=None if is_pdb_enabled else MAX_TIME_FOR_APP_TO_STARTUP, + shutdown_timeout=None if is_pdb_enabled else MAX_TIME_FOR_APP_TO_SHUTDOWN, + ): + yield app + + +@pytest.fixture +async def async_engine(app_started: FastAPI) -> AsyncEngine: + # Overrides + return get_engine(app_started) + + +@pytest.fixture +def api_key_repo( + async_engine: AsyncEngine, +) -> ApiKeysRepository: + return ApiKeysRepository(db_engine=async_engine) + + +@pytest.fixture +def users_repo( + async_engine: AsyncEngine, +) -> UsersRepository: + return UsersRepository(db_engine=async_engine) + + +@pytest.fixture +async def api_key_in_db( + create_fake_api_keys: Callable[[PositiveInt], AsyncGenerator[ApiKeyInDB, None]], +) -> ApiKeyInDB: + """Creates a single API key in DB for testing purposes""" + return await anext(create_fake_api_keys(1)) diff --git a/services/api-server/tests/unit/_with_db/authentication/test_api_dependency_authentication.py b/services/api-server/tests/unit/_with_db/authentication/test_api_dependency_authentication.py new file mode 100644 index 000000000000..8960d9762cff --- /dev/null +++ b/services/api-server/tests/unit/_with_db/authentication/test_api_dependency_authentication.py @@ -0,0 +1,150 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=unused-variable +# pylint: disable=too-many-arguments + +import time + +import pytest +from fastapi.security import HTTPBasicCredentials +from models_library.api_schemas_api_server.api_keys import ApiKeyInDB +from pytest_mock import MockerFixture +from simcore_service_api_server.api.dependencies.authentication import ( + get_current_identity, +) +from simcore_service_api_server.repository.api_keys import ApiKeysRepository +from simcore_service_api_server.repository.users import UsersRepository + + +async def test_rest_dependency_authentication( + api_key_in_db: ApiKeyInDB, + api_key_repo: ApiKeysRepository, + users_repo: UsersRepository, +): + + # Generate a fake API key + # Act + result = await get_current_identity( + apikeys_repo=api_key_repo, + users_repo=users_repo, + credentials=HTTPBasicCredentials( + username=api_key_in_db.api_key, password=api_key_in_db.api_secret + ), + ) + + # Assert + assert result is not None + assert result.user_id == api_key_in_db.user_id + assert result.product_name == api_key_in_db.product_name + + +@pytest.mark.skip(reason="This test is intended to be used for local profiling only") +async def test_cache_effectiveness_in_rest_authentication_dependencies( + api_key_in_db: ApiKeyInDB, + api_key_repo: ApiKeysRepository, + users_repo: UsersRepository, + mocker: MockerFixture, +): + """Test that caching reduces database calls and improves performance.""" + + # Generate a fake API key + credentials = HTTPBasicCredentials( + username=api_key_in_db.api_key, password=api_key_in_db.api_secret + ) + + # Get cache instances from repository methods + # pylint: disable=no-member + api_keys_cache = api_key_repo.get_user.cache + users_cache = users_repo.get_active_user_email.cache + + # Clear any existing cache + await api_keys_cache.clear() + await users_cache.clear() + + # Spy on the connection's execute method by patching AsyncConnection.execute + from sqlalchemy.ext.asyncio import AsyncConnection # noqa: PLC0415 + + execute_spy = mocker.spy(AsyncConnection, "execute") + + # Test with cache enabled (default behavior) + # First call - should hit database + start_time = time.time() + result1 = await get_current_identity( + apikeys_repo=api_key_repo, + users_repo=users_repo, + credentials=credentials, + ) + first_call_time = time.time() - start_time + + # Second call - should use cache + start_time = time.time() + result2 = await get_current_identity( + apikeys_repo=api_key_repo, + users_repo=users_repo, + credentials=credentials, + ) + second_call_time = time.time() - start_time + + cached_db_calls = execute_spy.call_count + execute_spy.reset_mock() + + # Verify cache was used + api_key_cache_key = f"api_auth:{api_key_in_db.api_key}" + user_cache_key = f"user_email:{api_key_in_db.user_id}" + + assert await api_keys_cache.exists(api_key_cache_key), "API key should be cached" + assert await users_cache.exists(user_cache_key), "User email should be cached" + + # Test with cache disabled by clearing cache before each call + await api_keys_cache.clear() + await users_cache.clear() + + # First call without cache + start_time = time.time() + result3 = await get_current_identity( + apikeys_repo=api_key_repo, + users_repo=users_repo, + credentials=credentials, + ) + no_cache_first_time = time.time() - start_time + + # Clear cache again to simulate no caching + await api_keys_cache.clear() + await users_cache.clear() + + # Second call without cache + start_time = time.time() + result4 = await get_current_identity( + apikeys_repo=api_key_repo, + users_repo=users_repo, + credentials=credentials, + ) + no_cache_second_time = time.time() - start_time + + no_cache_db_calls = execute_spy.call_count + + # ASSERTIONS + # All results should be identical + assert result1.user_id == result2.user_id == result3.user_id == result4.user_id + assert ( + result1.product_name + == result2.product_name + == result3.product_name + == result4.product_name + ) + assert result1.email == result2.email == result3.email == result4.email + + # With cache: second call should be significantly faster + assert ( + second_call_time < first_call_time * 0.5 + ), "Cache should make subsequent calls faster" + + # Without cache: both calls should take similar time + time_ratio = abs(no_cache_second_time - no_cache_first_time) / max( + no_cache_first_time, no_cache_second_time + ) + assert time_ratio < 0.5, "Without cache, call times should be similar" + + # With cache: fewer total database calls (due to caching) + # Without cache: more database calls (no caching) + assert no_cache_db_calls > cached_db_calls, "Cache should reduce database calls" diff --git a/services/api-server/tests/unit/_with_db/authentication/test_repository_api_keys.py b/services/api-server/tests/unit/_with_db/authentication/test_repository_api_keys.py new file mode 100644 index 000000000000..916c629375ef --- /dev/null +++ b/services/api-server/tests/unit/_with_db/authentication/test_repository_api_keys.py @@ -0,0 +1,39 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument +# pylint: disable=unused-variable +# pylint: disable=too-many-arguments + + +from models_library.api_schemas_api_server.api_keys import ApiKeyInDB +from simcore_service_api_server.repository.api_keys import ApiKeysRepository + + +async def test_get_user_with_valid_credentials( + api_key_in_db: ApiKeyInDB, + api_key_repo: ApiKeysRepository, +): + # Act + result = await api_key_repo.get_user( + api_key=api_key_in_db.api_key, api_secret=api_key_in_db.api_secret + ) + + # Assert + assert result is not None + assert result.user_id == api_key_in_db.user_id + assert result.product_name == api_key_in_db.product_name + + +async def test_get_user_with_invalid_credentials( + api_key_in_db: ApiKeyInDB, + api_key_repo: ApiKeysRepository, +): + + # Generate a fake API key + + # Act - use wrong secret + result = await api_key_repo.get_user( + api_key=api_key_in_db.api_key, api_secret="wrong_secret" + ) + + # Assert + assert result is None diff --git a/services/api-server/tests/unit/_with_db/conftest.py b/services/api-server/tests/unit/_with_db/conftest.py index 90fcea5f8091..41b8b759b781 100644 --- a/services/api-server/tests/unit/_with_db/conftest.py +++ b/services/api-server/tests/unit/_with_db/conftest.py @@ -301,7 +301,9 @@ async def _generate_fake_api_key(n: PositiveInt): _generate_fake_api_key.row_ids.append(row.id) - yield ApiKeyInDB.model_validate({"api_secret": plain_api_secret, **row}) + yield ApiKeyInDB.model_validate( + {"api_secret": plain_api_secret, **row._asdict()} + ) _generate_fake_api_key.row_ids = [] yield _generate_fake_api_key