Skip to content
Merged
1 change: 1 addition & 0 deletions services/api-server/requirements/_base.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
--requirement ../../../packages/service-library/requirements/_base.in
--requirement ../../../packages/service-library/requirements/_fastapi.in

aiocache
aiofiles
cryptography
fastapi-pagination
Expand Down
1 change: 1 addition & 0 deletions services/api-server/requirements/_base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ aiocache==0.12.3
# -r requirements/../../../packages/service-library/requirements/_base.in
# -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in
# -r requirements/../../../packages/simcore-sdk/requirements/_base.in
# -r requirements/_base.in
aiodebug==2.3.0
# via
# -r requirements/../../../packages/celery-library/requirements/../../../packages/service-library/requirements/_base.in
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,61 @@
from dataclasses import dataclass
from typing import Final

from sqlalchemy.ext.asyncio import AsyncEngine

## Memory Usage of aiocache for Session Authentication Caching
AUTH_SESSION_TTL_SECONDS: Final = 120 # 2 minutes
"""
### Memory Usage Characteristics

**aiocache** uses in-memory storage by default, which means:

1. **Linear memory growth**: Each cached item consumes RAM proportional to the serialized size of the cached data
2. **No automatic memory limits**: By default, there's no built-in maximum memory cap
3. **TTL-based cleanup**: Items are only removed when they expire (TTL) or are explicitly deleted


**Key limitations:**
- **MEMORY backend**: No built-in memory limits or LRU eviction
- **Maximum capacity**: Limited only by available system RAM
- **Risk**: Memory leaks if TTL is too long or cache keys grow unbounded

### Recommendations for Your Use Case

**For authentication caching:**

1. **Low memory impact**: User authentication data is typically small (user_id, email, product_name)
2. **Short TTL**: Your 120s TTL helps prevent unbounded growth
3. **Bounded key space**: API keys are finite, not user-generated

**Memory estimation:**
```
Per cache entry ≈ 200-500 bytes (user data + overhead)
1000 active users ≈ 500KB
10000 active users ≈ 5MB
```

### Alternative Approaches

**If memory becomes a concern:**

1. **Redis backend**:
```python
cache = Cache(Cache.REDIS, endpoint="redis://localhost", ...)
```

2. **Custom eviction policy**: Implement LRU manually or use shorter TTL

3. **Monitoring**: Track cache size in production:
```python
# Check cache statistics
cache_stats = await cache.get_stats()
```

**Verdict**:
For authentication use case with reasonable user counts (<10K active), memory impact should be minimal with your current TTL configuration.
"""


@dataclass
class BaseRepository:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
from typing import NamedTuple

import sqlalchemy as sa
from aiocache import cached # type: ignore[import-untyped]
from models_library.products import ProductName
from pydantic.types import PositiveInt
from simcore_postgres_database.models.api_keys import api_keys as auth_api_keys_table
from simcore_postgres_database.utils_repos import pass_or_acquire_connection
from sqlalchemy.ext.asyncio import AsyncConnection

from ._base import BaseRepository
from ._base import AUTH_SESSION_TTL_SECONDS, BaseRepository

_logger = logging.getLogger(__name__)

Expand All @@ -21,13 +22,24 @@ class UserAndProductTuple(NamedTuple):
class ApiKeysRepository(BaseRepository):
"""Auth access"""

@cached(
ttl=AUTH_SESSION_TTL_SECONDS,
key_builder=lambda *_args, **kwargs: f"api_auth:{kwargs['api_key']}",
namespace=__name__,
noself=True,
)
async def get_user(
self,
connection: AsyncConnection | None = None,
*,
api_key: str,
api_secret: str
api_secret: str,
) -> UserAndProductTuple | None:
"""Validates API key and secret, returning user info if valid otherwise None.

WARNING: Cached for 120s TTL - secret validation occurs every 2 minutes.
NOTE: to disable caching set AIOCACHE_DISABLE=1
"""

stmt = sa.select(
auth_api_keys_table.c.user_id,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sqlalchemy as sa
from aiocache import Cache, cached # type: ignore[import-untyped]
from common_library.users_enums import UserStatus
from models_library.emails import LowerCaseEmailStr
from models_library.users import UserID
Expand All @@ -7,16 +8,34 @@
from simcore_postgres_database.utils_repos import pass_or_acquire_connection
from sqlalchemy.ext.asyncio import AsyncConnection

from ._base import BaseRepository
from ._base import AUTH_SESSION_TTL_SECONDS, BaseRepository


class UsersRepository(BaseRepository):
@cached(
ttl=AUTH_SESSION_TTL_SECONDS,
key_builder=lambda *_args, **kwargs: f"user_email:{kwargs['user_id']}",
cache=Cache.MEMORY,
namespace=__name__,
noself=True,
)
async def get_active_user_email(
self,
connection: AsyncConnection | None = None,
*,
user_id: UserID,
) -> LowerCaseEmailStr | None:
"""Retrieves the email address of an active user.

Arguments:
user_id -- The ID of the user whose email is to be retrieved.

Returns:
The email address of the user if found, otherwise None.

WARNING: Cached for 120s TTL - email changes will not be seen for 2 minutes.
NOTE: to disable caching set AIOCACHE_DISABLE=1
"""
async with pass_or_acquire_connection(self.db_engine, connection) as conn:
email = await conn.scalar(
sa.select(users.c.email).where(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# pylint: disable=redefined-outer-name
# pylint: disable=unused-argument
# pylint: disable=unused-variable
# pylint: disable=too-many-arguments

from collections.abc import AsyncGenerator, AsyncIterator, Callable

import pytest
from asgi_lifespan import LifespanManager
from fastapi import FastAPI
from models_library.api_schemas_api_server.api_keys import ApiKeyInDB
from pydantic import PositiveInt
from simcore_service_api_server.clients.postgres import get_engine
from simcore_service_api_server.repository.api_keys import ApiKeysRepository
from simcore_service_api_server.repository.users import UsersRepository
from sqlalchemy.ext.asyncio import AsyncEngine

MAX_TIME_FOR_APP_TO_STARTUP = 10
MAX_TIME_FOR_APP_TO_SHUTDOWN = 10


@pytest.fixture
async def app_started(app: FastAPI, is_pdb_enabled: bool) -> AsyncIterator[FastAPI]:
# LifespanManager will trigger app's startup&shutown event handlers
async with LifespanManager(
app,
startup_timeout=None if is_pdb_enabled else MAX_TIME_FOR_APP_TO_STARTUP,
shutdown_timeout=None if is_pdb_enabled else MAX_TIME_FOR_APP_TO_SHUTDOWN,
):
yield app


@pytest.fixture
async def async_engine(app_started: FastAPI) -> AsyncEngine:
# Overrides
return get_engine(app_started)


@pytest.fixture
def api_key_repo(
async_engine: AsyncEngine,
) -> ApiKeysRepository:
return ApiKeysRepository(db_engine=async_engine)


@pytest.fixture
def users_repo(
async_engine: AsyncEngine,
) -> UsersRepository:
return UsersRepository(db_engine=async_engine)


@pytest.fixture
async def api_key_in_db(
create_fake_api_keys: Callable[[PositiveInt], AsyncGenerator[ApiKeyInDB, None]],
) -> ApiKeyInDB:
"""Creates a single API key in DB for testing purposes"""
return await anext(create_fake_api_keys(1))
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# pylint: disable=redefined-outer-name
# pylint: disable=unused-argument
# pylint: disable=unused-variable
# pylint: disable=too-many-arguments

import time

import pytest
from fastapi.security import HTTPBasicCredentials
from models_library.api_schemas_api_server.api_keys import ApiKeyInDB
from pytest_mock import MockerFixture
from simcore_service_api_server.api.dependencies.authentication import (
get_current_identity,
)
from simcore_service_api_server.repository.api_keys import ApiKeysRepository
from simcore_service_api_server.repository.users import UsersRepository


async def test_rest_dependency_authentication(
api_key_in_db: ApiKeyInDB,
api_key_repo: ApiKeysRepository,
users_repo: UsersRepository,
):

# Generate a fake API key
# Act
result = await get_current_identity(
apikeys_repo=api_key_repo,
users_repo=users_repo,
credentials=HTTPBasicCredentials(
username=api_key_in_db.api_key, password=api_key_in_db.api_secret
),
)

# Assert
assert result is not None
assert result.user_id == api_key_in_db.user_id
assert result.product_name == api_key_in_db.product_name


@pytest.mark.skip(reason="This test is intended to be used for local profiling only")
async def test_cache_effectiveness_in_rest_authentication_dependencies(
api_key_in_db: ApiKeyInDB,
api_key_repo: ApiKeysRepository,
users_repo: UsersRepository,
mocker: MockerFixture,
):
"""Test that caching reduces database calls and improves performance."""

# Generate a fake API key
credentials = HTTPBasicCredentials(
username=api_key_in_db.api_key, password=api_key_in_db.api_secret
)

# Get cache instances from repository methods
# pylint: disable=no-member
api_keys_cache = api_key_repo.get_user.cache
users_cache = users_repo.get_active_user_email.cache

# Clear any existing cache
await api_keys_cache.clear()
await users_cache.clear()

# Spy on the connection's execute method by patching AsyncConnection.execute
from sqlalchemy.ext.asyncio import AsyncConnection # noqa: PLC0415

execute_spy = mocker.spy(AsyncConnection, "execute")

# Test with cache enabled (default behavior)
# First call - should hit database
start_time = time.time()
result1 = await get_current_identity(
apikeys_repo=api_key_repo,
users_repo=users_repo,
credentials=credentials,
)
first_call_time = time.time() - start_time

# Second call - should use cache
start_time = time.time()
result2 = await get_current_identity(
apikeys_repo=api_key_repo,
users_repo=users_repo,
credentials=credentials,
)
second_call_time = time.time() - start_time

cached_db_calls = execute_spy.call_count
execute_spy.reset_mock()

# Verify cache was used
api_key_cache_key = f"api_auth:{api_key_in_db.api_key}"
user_cache_key = f"user_email:{api_key_in_db.user_id}"

assert await api_keys_cache.exists(api_key_cache_key), "API key should be cached"
assert await users_cache.exists(user_cache_key), "User email should be cached"

# Test with cache disabled by clearing cache before each call
await api_keys_cache.clear()
await users_cache.clear()

# First call without cache
start_time = time.time()
result3 = await get_current_identity(
apikeys_repo=api_key_repo,
users_repo=users_repo,
credentials=credentials,
)
no_cache_first_time = time.time() - start_time

# Clear cache again to simulate no caching
await api_keys_cache.clear()
await users_cache.clear()

# Second call without cache
start_time = time.time()
result4 = await get_current_identity(
apikeys_repo=api_key_repo,
users_repo=users_repo,
credentials=credentials,
)
no_cache_second_time = time.time() - start_time

no_cache_db_calls = execute_spy.call_count

# ASSERTIONS
# All results should be identical
assert result1.user_id == result2.user_id == result3.user_id == result4.user_id
assert (
result1.product_name
== result2.product_name
== result3.product_name
== result4.product_name
)
assert result1.email == result2.email == result3.email == result4.email

# With cache: second call should be significantly faster
assert (
second_call_time < first_call_time * 0.5
), "Cache should make subsequent calls faster"

# Without cache: both calls should take similar time
time_ratio = abs(no_cache_second_time - no_cache_first_time) / max(
no_cache_first_time, no_cache_second_time
)
assert time_ratio < 0.5, "Without cache, call times should be similar"

# With cache: fewer total database calls (due to caching)
# Without cache: more database calls (no caching)
assert no_cache_db_calls > cached_db_calls, "Cache should reduce database calls"
Loading
Loading