From 288adc41d1bca6efe3122477da7c26e8a8d714ed Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Wed, 10 Dec 2025 15:12:18 -0500 Subject: [PATCH 1/8] Modernize file storage with UUID-based naming, compression, and tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Refactor file storage to use UUID-based filenames instead of timestamps - Add proper get() function that finds vCons by UUID - Add delete(), exists(), and list_vcons() functions - Add optional gzip compression support - Add date-based directory organization (YYYY/MM/DD) - Add configurable file size limits and permissions - Add Docker volume (vcon_files) for persistent file storage - Add comprehensive test suite with 30+ tests - Update documentation with configuration options and examples 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- example_config.yml | 9 + example_docker-compose.yml | 3 + server/storage/file/README.md | 154 +++++-- server/storage/file/__init__.py | 311 ++++++++++++-- server/storage/file/test_file_storage.py | 498 +++++++++++++++++++++++ 5 files changed, 896 insertions(+), 79 deletions(-) create mode 100644 server/storage/file/test_file_storage.py diff --git a/example_config.yml b/example_config.yml index a12083b..279effd 100644 --- a/example_config.yml +++ b/example_config.yml @@ -105,6 +105,15 @@ tracers: # dlq_vcon_on_error: True storages: + file: + module: storage.file + options: + path: /data/vcons + organize_by_date: true + compression: false + max_file_size: 10485760 + file_permissions: 0644 + dir_permissions: 0755 mongo: module: storage.mongo options: diff --git a/example_docker-compose.yml b/example_docker-compose.yml index a7bd4a4..d28d492 100644 --- a/example_docker-compose.yml +++ b/example_docker-compose.yml @@ -11,6 +11,7 @@ services: command: "watchmedo auto-restart -p '*.py' -R python -- ./server/main.py" volumes: - .:/app + - vcon_files:/data/vcons depends_on: - redis env_file: @@ -30,6 +31,7 @@ services: command: /bin/bash -c "poetry run uvicorn server.api:app --host 0.0.0.0 --port 8000" volumes: - .:/app + - vcon_files:/data/vcons ports: - "${CONSERVER_EXTERNAL_PORT:-8000}:8000" depends_on: @@ -249,3 +251,4 @@ services: volumes: es_data: {} redis_data: {} + vcon_files: {} diff --git a/server/storage/file/README.md b/server/storage/file/README.md index 2d2cd1b..f1b9153 100644 --- a/server/storage/file/README.md +++ b/server/storage/file/README.md @@ -4,75 +4,151 @@ This module implements local file system storage for the vCon server. ## Overview -File storage provides simple, local file system storage capabilities, making it ideal for development, testing, and small-scale deployments of vCon data. +File storage provides local file system storage capabilities, making it ideal for development, testing, and small-scale deployments. Files are stored using the vCon UUID as the filename, with optional date-based directory organization and gzip compression. ## Configuration -Required configuration options: +Configuration options in `config.yml`: ```yaml storages: file: module: storage.file options: - base_path: /path/to/storage # Base directory for file storage - file_format: json # File format (json/txt) - compression: false # Enable compression + path: /data/vcons # Base directory for storage + organize_by_date: true # Store in YYYY/MM/DD subdirectories + compression: false # Enable gzip compression max_file_size: 10485760 # Max file size in bytes (10MB) - file_permissions: 0644 # File permissions + file_permissions: 0644 # Unix file permissions (octal) + dir_permissions: 0755 # Unix directory permissions (octal) ``` +### Configuration Options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `path` | string | `/data/vcons` | Base directory for vCon file storage | +| `organize_by_date` | boolean | `true` | Organize files in YYYY/MM/DD subdirectories based on vCon creation date | +| `compression` | boolean | `false` | Enable gzip compression (files saved as `.json.gz`) | +| `max_file_size` | integer | `10485760` | Maximum file size in bytes (10MB default) | +| `file_permissions` | integer | `0644` | Unix permissions for created files | +| `dir_permissions` | integer | `0755` | Unix permissions for created directories | + ## Features -- Local file storage -- Multiple file formats -- Compression support -- File size limits -- Automatic metrics logging -- File organization -- Permission management +- **UUID-based filenames**: Files are named `{uuid}.json` or `{uuid}.json.gz` +- **Date-based organization**: Optional YYYY/MM/DD directory structure based on vCon creation date +- **Gzip compression**: Reduce storage space with optional compression +- **File size limits**: Prevent oversized files from consuming disk space +- **Permission management**: Configure Unix file and directory permissions +- **Automatic cleanup**: Empty directories are removed when vCons are deleted +- **Metrics logging**: All operations are automatically timed and logged + +## Docker Volume Configuration + +When using Docker, mount a volume for persistent file storage: + +```yaml +services: + conserver: + volumes: + - vcon_files:/data/vcons + +volumes: + vcon_files: {} +``` ## Usage ```python from storage import Storage -# Initialize File storage +# Initialize file storage file_storage = Storage("file") -# Save vCon data -file_storage.save(vcon_id) +# Save vCon data (retrieves from Redis and writes to file) +file_storage.save(vcon_uuid) # Retrieve vCon data -vcon_data = file_storage.get(vcon_id) +vcon_data = file_storage.get(vcon_uuid) + +# Delete vCon file +file_storage.delete(vcon_uuid) +``` + +### Direct Module Usage + +For more control, you can use the module functions directly: + +```python +from server.storage.file import save, get, delete, exists, list_vcons + +# Check if vCon exists +if exists("my-uuid", opts): + data = get("my-uuid", opts) + +# List all vCons with pagination +uuids = list_vcons(opts, limit=100, offset=0) + +# Delete a vCon +deleted = delete("my-uuid", opts) +``` + +## File Organization + +### Flat Structure (`organize_by_date: false`) +``` +/data/vcons/ +├── abc123.json +├── def456.json +└── ghi789.json.gz ``` -## Implementation Details +### Date-Based Structure (`organize_by_date: true`) +``` +/data/vcons/ +├── 2024/ +│ ├── 03/ +│ │ ├── 14/ +│ │ │ ├── abc123.json +│ │ │ └── def456.json +│ │ └── 15/ +│ │ └── ghi789.json +│ └── 04/ +│ └── 01/ +│ └── jkl012.json +``` + +## API Reference + +### `save(vcon_uuid: str, opts: dict = None) -> None` +Save a vCon to file storage. Retrieves the vCon from Redis and writes it to a file. + +### `get(vcon_uuid: str, opts: dict = None) -> Optional[dict]` +Retrieve a vCon from file storage by UUID. Returns `None` if not found. + +### `delete(vcon_uuid: str, opts: dict = None) -> bool` +Delete a vCon file. Returns `True` if deleted, `False` if not found. + +### `exists(vcon_uuid: str, opts: dict = None) -> bool` +Check if a vCon exists in file storage. -The File storage implementation: -- Uses standard file system operations -- Implements file compression -- Supports multiple file formats -- Provides file organization -- Includes automatic metrics logging +### `list_vcons(opts: dict = None, limit: int = 100, offset: int = 0) -> list[str]` +List vCon UUIDs in storage with pagination support. Returns UUIDs sorted by modification time (newest first). ## Dependencies -- json -- gzip -- pathlib +- `json` - JSON serialization +- `gzip` - Compression support +- `pathlib` - Path manipulation +- `glob` - File pattern matching ## Best Practices -1. Regular file cleanup -2. Implement file rotation -3. Use appropriate file formats -4. Monitor disk space -5. Implement proper error handling -6. Use compression for large files -7. Regular backup -8. Implement file size limits -9. Use appropriate file permissions -10. Monitor file system performance -11. Implement proper directory structure -12. Handle file locking \ No newline at end of file +1. **Use compression** for large vCons to save disk space +2. **Enable date organization** for easier manual browsing and archival +3. **Set appropriate permissions** for security (default 0644 for files) +4. **Monitor disk space** - implement cleanup policies for old files +5. **Configure volume mounts** in Docker for data persistence +6. **Set reasonable file size limits** to prevent runaway storage +7. **Use S3 or other cloud storage** for production deployments with large volumes \ No newline at end of file diff --git a/server/storage/file/__init__.py b/server/storage/file/__init__.py index 7bd65e4..06ee4b2 100644 --- a/server/storage/file/__init__.py +++ b/server/storage/file/__init__.py @@ -1,6 +1,20 @@ +""" +File Storage Module + +Provides local file system storage for vCon data with support for: +- UUID-based file organization +- Optional compression (gzip) +- Date-based directory structure +- Configurable file permissions +- File size limits +""" + import os import json +import gzip +import shutil from glob import glob +from pathlib import Path from typing import Optional from lib.logging_utils import init_logger from server.lib.vcon_redis import VconRedis @@ -9,58 +23,275 @@ logger = init_logger(__name__) default_options = { - "path": ".", - "add_timestamp_to_filename": True, - "filename": "vcon", - "extension": "json", + "path": "/data/vcons", + "organize_by_date": True, + "compression": False, + "max_file_size": 10485760, # 10MB + "file_permissions": 0o644, + "dir_permissions": 0o755, } -def save( - vcon_uuid, - opts=default_options, -): - logger.info("Saving vCon to file storage") +def _get_file_path(vcon_uuid: str, opts: dict, created_at: Optional[str] = None) -> Path: + """ + Generate the file path for a vCon. + + If organize_by_date is True, files are stored in YYYY/MM/DD subdirectories. + """ + base_path = Path(opts.get("path", default_options["path"])) + extension = "json.gz" if opts.get("compression", False) else "json" + + if opts.get("organize_by_date", True) and created_at: + try: + dt = datetime.fromisoformat(created_at.replace("Z", "+00:00")) + date_path = dt.strftime("%Y/%m/%d") + return base_path / date_path / f"{vcon_uuid}.{extension}" + except (ValueError, AttributeError): + pass + + return base_path / f"{vcon_uuid}.{extension}" + + +def _ensure_directory(file_path: Path, dir_permissions: int) -> None: + """Ensure the parent directory exists with proper permissions.""" + file_path.parent.mkdir(parents=True, exist_ok=True) + try: + os.chmod(file_path.parent, dir_permissions) + except OSError: + pass # May fail on some systems, not critical + + +def _find_vcon_file(vcon_uuid: str, opts: dict) -> Optional[Path]: + """ + Find an existing vCon file by UUID. + + Searches both flat and date-organized directory structures. + """ + base_path = Path(opts.get("path", default_options["path"])) + compression = opts.get("compression", False) + + # Try both compressed and uncompressed extensions + extensions = ["json.gz", "json"] if compression else ["json", "json.gz"] + + for ext in extensions: + # Check flat structure first + flat_path = base_path / f"{vcon_uuid}.{ext}" + if flat_path.exists(): + return flat_path + + # Search date-organized directories + pattern = str(base_path / "**" / f"{vcon_uuid}.{ext}") + matches = glob(pattern, recursive=True) + if matches: + return Path(matches[0]) + + return None + + +def save(vcon_uuid: str, opts: dict = None) -> None: + """ + Save a vCon to file storage. + + Args: + vcon_uuid: The UUID of the vCon to save + opts: Storage options including: + - path: Base directory for storage + - organize_by_date: Whether to use YYYY/MM/DD subdirectories + - compression: Whether to gzip compress the file + - max_file_size: Maximum file size in bytes + - file_permissions: Unix file permissions + - dir_permissions: Unix directory permissions + """ + if opts is None: + opts = default_options + + logger.info("Saving vCon to file storage: %s", vcon_uuid) + try: vcon_redis = VconRedis() vcon = vcon_redis.get_vcon(vcon_uuid) - if opts["add_timestamp_to_filename"]: - filename = ( - f"{opts['filename']}-{datetime.now().isoformat()}.{opts['extension']}" + vcon_data = vcon.dumps() + + # Check file size limit + max_size = opts.get("max_file_size", default_options["max_file_size"]) + if len(vcon_data.encode("utf-8")) > max_size: + raise ValueError( + f"vCon data exceeds max file size: {len(vcon_data)} > {max_size}" ) + + # Get the file path + created_at = getattr(vcon, "created_at", None) + file_path = _get_file_path(vcon_uuid, opts, created_at) + + # Ensure directory exists + dir_permissions = opts.get("dir_permissions", default_options["dir_permissions"]) + _ensure_directory(file_path, dir_permissions) + + # Write the file + compression = opts.get("compression", default_options["compression"]) + file_permissions = opts.get("file_permissions", default_options["file_permissions"]) + + if compression: + with gzip.open(file_path, "wt", encoding="utf-8") as f: + f.write(vcon_data) else: - filename = f"{opts['filename']}.{opts['extension']}" + with open(file_path, "w", encoding="utf-8") as f: + f.write(vcon_data) + + # Set file permissions + try: + os.chmod(file_path, file_permissions) + except OSError: + pass # May fail on some systems, not critical + + logger.info("file storage: saved vCon %s to %s", vcon_uuid, file_path) - with open(f"{opts['path']}/{filename}", "w") as f: - f.write(vcon.dumps()) - logger.info(f"file storage plugin: inserted vCon: {vcon_uuid}") except Exception as e: - logger.error( - f"file storage plugin: failed to insert vCon: {vcon_uuid}, error: {e} " - ) - raise e + logger.error("file storage: failed to save vCon %s: %s", vcon_uuid, e) + raise + + +def get(vcon_uuid: str, opts: dict = None) -> Optional[dict]: + """ + Get a vCon from file storage by UUID. + + Args: + vcon_uuid: The UUID of the vCon to retrieve + opts: Storage options + + Returns: + The vCon data as a dictionary, or None if not found + """ + if opts is None: + opts = default_options -def get(vcon_uuid: str, opts=default_options) -> Optional[dict]: - """Get a vCon from file storage by UUID.""" try: - # Since files are saved with timestamps, we need to find the latest file - base_path = opts['path'] - base_name = opts['filename'] - ext = opts['extension'] - - # Look for files matching the pattern - pattern = f"{base_path}/{base_name}*.{ext}" - matching_files = glob(pattern) - - if not matching_files: + file_path = _find_vcon_file(vcon_uuid, opts) + + if file_path is None: + logger.debug("file storage: vCon not found: %s", vcon_uuid) return None - - # Get the most recent file - latest_file = max(matching_files, key=os.path.getctime) - - with open(latest_file, 'r') as f: - return json.loads(f.read()) - + + # Read the file (handle both compressed and uncompressed) + if file_path.suffix == ".gz" or str(file_path).endswith(".json.gz"): + with gzip.open(file_path, "rt", encoding="utf-8") as f: + data = json.load(f) + else: + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + + logger.info("file storage: retrieved vCon %s from %s", vcon_uuid, file_path) + return data + except Exception as e: - logger.error(f"file storage plugin: failed to get vCon: {vcon_uuid}, error: {e}") + logger.error("file storage: failed to get vCon %s: %s", vcon_uuid, e) return None + + +def delete(vcon_uuid: str, opts: dict = None) -> bool: + """ + Delete a vCon from file storage. + + Args: + vcon_uuid: The UUID of the vCon to delete + opts: Storage options + + Returns: + True if the file was deleted, False otherwise + """ + if opts is None: + opts = default_options + + try: + file_path = _find_vcon_file(vcon_uuid, opts) + + if file_path is None: + logger.debug("file storage: vCon not found for deletion: %s", vcon_uuid) + return False + + file_path.unlink() + logger.info("file storage: deleted vCon %s from %s", vcon_uuid, file_path) + + # Clean up empty parent directories + _cleanup_empty_dirs(file_path.parent, Path(opts.get("path", default_options["path"]))) + + return True + + except Exception as e: + logger.error("file storage: failed to delete vCon %s: %s", vcon_uuid, e) + return False + + +def _cleanup_empty_dirs(dir_path: Path, base_path: Path) -> None: + """Remove empty directories up to the base path.""" + try: + while dir_path != base_path and dir_path.is_dir(): + if any(dir_path.iterdir()): + break # Directory not empty + dir_path.rmdir() + dir_path = dir_path.parent + except OSError: + pass # Ignore errors during cleanup + + +def exists(vcon_uuid: str, opts: dict = None) -> bool: + """ + Check if a vCon exists in file storage. + + Args: + vcon_uuid: The UUID of the vCon to check + opts: Storage options + + Returns: + True if the vCon exists, False otherwise + """ + if opts is None: + opts = default_options + + return _find_vcon_file(vcon_uuid, opts) is not None + + +def list_vcons(opts: dict = None, limit: int = 100, offset: int = 0) -> list[str]: + """ + List vCon UUIDs in storage. + + Args: + opts: Storage options + limit: Maximum number of UUIDs to return + offset: Number of UUIDs to skip + + Returns: + List of vCon UUIDs + """ + if opts is None: + opts = default_options + + base_path = Path(opts.get("path", default_options["path"])) + + try: + # Find all vCon files + pattern = str(base_path / "**" / "*.json*") + all_files = glob(pattern, recursive=True) + + # Extract UUIDs from filenames + uuids = [] + for file_path in all_files: + filename = Path(file_path).name + # Remove extensions (.json or .json.gz) + uuid = filename.replace(".json.gz", "").replace(".json", "") + uuids.append(uuid) + + # Sort by modification time (newest first) + uuids_with_mtime = [ + (uuid, os.path.getmtime(f)) + for uuid, f in zip(uuids, all_files) + ] + uuids_with_mtime.sort(key=lambda x: x[1], reverse=True) + + # Apply pagination + paginated = uuids_with_mtime[offset:offset + limit] + return [uuid for uuid, _ in paginated] + + except Exception as e: + logger.error("file storage: failed to list vCons: %s", e) + return [] diff --git a/server/storage/file/test_file_storage.py b/server/storage/file/test_file_storage.py new file mode 100644 index 0000000..ddc1315 --- /dev/null +++ b/server/storage/file/test_file_storage.py @@ -0,0 +1,498 @@ +""" +Tests for the file storage module. + +Tests cover: +- Basic CRUD operations (save, get, delete) +- Compression support +- Date-based organization +- File size limits +- Edge cases and error handling +""" + +import pytest +import json +import gzip +import os +import tempfile +from pathlib import Path +from unittest.mock import patch, MagicMock +from datetime import datetime + +from server.storage.file import ( + save, + get, + delete, + exists, + list_vcons, + default_options, + _get_file_path, + _find_vcon_file, + _cleanup_empty_dirs, +) +from server.vcon import Vcon + + +@pytest.fixture +def temp_storage_dir(): + """Create a temporary directory for file storage tests.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield tmpdir + + +@pytest.fixture +def sample_vcon(): + """Create a sample vCon for testing.""" + vcon = Vcon.build_new() + vcon.add_party({"name": "Test User", "role": "agent"}) + vcon.add_dialog({"type": "text", "body": "Hello, world!"}) + return vcon + + +@pytest.fixture +def mock_vcon_redis(sample_vcon): + """Mock VconRedis to return sample vCon.""" + with patch("server.storage.file.VconRedis") as MockVconRedis: + mock_redis = MagicMock() + mock_redis.get_vcon.return_value = sample_vcon + MockVconRedis.return_value = mock_redis + yield MockVconRedis + + +class TestGetFilePath: + """Tests for _get_file_path helper function.""" + + def test_flat_structure_no_date(self, temp_storage_dir): + """Test file path without date organization.""" + opts = {"path": temp_storage_dir, "organize_by_date": False, "compression": False} + path = _get_file_path("test-uuid", opts) + assert path == Path(temp_storage_dir) / "test-uuid.json" + + def test_flat_structure_with_compression(self, temp_storage_dir): + """Test file path with compression enabled.""" + opts = {"path": temp_storage_dir, "organize_by_date": False, "compression": True} + path = _get_file_path("test-uuid", opts) + assert path == Path(temp_storage_dir) / "test-uuid.json.gz" + + def test_date_organized_structure(self, temp_storage_dir): + """Test file path with date organization.""" + opts = {"path": temp_storage_dir, "organize_by_date": True, "compression": False} + created_at = "2024-03-15T10:30:00+00:00" + path = _get_file_path("test-uuid", opts, created_at) + assert path == Path(temp_storage_dir) / "2024/03/15" / "test-uuid.json" + + def test_date_organized_with_z_suffix(self, temp_storage_dir): + """Test file path with Z suffix in timestamp.""" + opts = {"path": temp_storage_dir, "organize_by_date": True, "compression": False} + created_at = "2024-03-15T10:30:00Z" + path = _get_file_path("test-uuid", opts, created_at) + assert path == Path(temp_storage_dir) / "2024/03/15" / "test-uuid.json" + + def test_invalid_date_falls_back_to_flat(self, temp_storage_dir): + """Test that invalid date falls back to flat structure.""" + opts = {"path": temp_storage_dir, "organize_by_date": True, "compression": False} + path = _get_file_path("test-uuid", opts, "invalid-date") + assert path == Path(temp_storage_dir) / "test-uuid.json" + + +class TestSave: + """Tests for save function.""" + + def test_save_basic(self, temp_storage_dir, mock_vcon_redis, sample_vcon): + """Test basic save operation.""" + opts = { + "path": temp_storage_dir, + "organize_by_date": False, + "compression": False, + "max_file_size": 10485760, + "file_permissions": 0o644, + "dir_permissions": 0o755, + } + + save(sample_vcon.uuid, opts) + + expected_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json" + assert expected_path.exists() + + with open(expected_path) as f: + saved_data = json.load(f) + assert saved_data["uuid"] == sample_vcon.uuid + + def test_save_with_compression(self, temp_storage_dir, mock_vcon_redis, sample_vcon): + """Test save with gzip compression.""" + opts = { + "path": temp_storage_dir, + "organize_by_date": False, + "compression": True, + "max_file_size": 10485760, + "file_permissions": 0o644, + "dir_permissions": 0o755, + } + + save(sample_vcon.uuid, opts) + + expected_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json.gz" + assert expected_path.exists() + + with gzip.open(expected_path, "rt") as f: + saved_data = json.load(f) + assert saved_data["uuid"] == sample_vcon.uuid + + def test_save_with_date_organization(self, temp_storage_dir, mock_vcon_redis, sample_vcon): + """Test save with date-based directory structure.""" + opts = { + "path": temp_storage_dir, + "organize_by_date": True, + "compression": False, + "max_file_size": 10485760, + "file_permissions": 0o644, + "dir_permissions": 0o755, + } + + save(sample_vcon.uuid, opts) + + # Should create date-based directory structure + base_path = Path(temp_storage_dir) + json_files = list(base_path.rglob("*.json")) + assert len(json_files) == 1 + assert sample_vcon.uuid in str(json_files[0]) + + def test_save_exceeds_max_size(self, temp_storage_dir, mock_vcon_redis, sample_vcon): + """Test that save fails when file exceeds max size.""" + opts = { + "path": temp_storage_dir, + "organize_by_date": False, + "compression": False, + "max_file_size": 10, # Very small limit + "file_permissions": 0o644, + "dir_permissions": 0o755, + } + + with pytest.raises(ValueError, match="exceeds max file size"): + save(sample_vcon.uuid, opts) + + def test_save_creates_directories(self, temp_storage_dir, mock_vcon_redis, sample_vcon): + """Test that save creates necessary directories.""" + nested_path = os.path.join(temp_storage_dir, "nested", "deep", "path") + opts = { + "path": nested_path, + "organize_by_date": False, + "compression": False, + "max_file_size": 10485760, + "file_permissions": 0o644, + "dir_permissions": 0o755, + } + + save(sample_vcon.uuid, opts) + + expected_path = Path(nested_path) / f"{sample_vcon.uuid}.json" + assert expected_path.exists() + + +class TestGet: + """Tests for get function.""" + + def test_get_basic(self, temp_storage_dir, sample_vcon): + """Test basic get operation.""" + opts = {"path": temp_storage_dir, "compression": False} + + # Create a test file + file_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json" + with open(file_path, "w") as f: + f.write(sample_vcon.dumps()) + + result = get(sample_vcon.uuid, opts) + assert result is not None + assert result["uuid"] == sample_vcon.uuid + + def test_get_compressed(self, temp_storage_dir, sample_vcon): + """Test get with compressed file.""" + opts = {"path": temp_storage_dir, "compression": True} + + # Create a compressed test file + file_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json.gz" + with gzip.open(file_path, "wt") as f: + f.write(sample_vcon.dumps()) + + result = get(sample_vcon.uuid, opts) + assert result is not None + assert result["uuid"] == sample_vcon.uuid + + def test_get_from_date_directory(self, temp_storage_dir, sample_vcon): + """Test get from date-organized directory.""" + opts = {"path": temp_storage_dir, "compression": False} + + # Create a date-organized test file + date_path = Path(temp_storage_dir) / "2024" / "03" / "15" + date_path.mkdir(parents=True) + file_path = date_path / f"{sample_vcon.uuid}.json" + with open(file_path, "w") as f: + f.write(sample_vcon.dumps()) + + result = get(sample_vcon.uuid, opts) + assert result is not None + assert result["uuid"] == sample_vcon.uuid + + def test_get_not_found(self, temp_storage_dir): + """Test get returns None for non-existent file.""" + opts = {"path": temp_storage_dir, "compression": False} + result = get("nonexistent-uuid", opts) + assert result is None + + def test_get_prefers_uncompressed_when_both_exist(self, temp_storage_dir, sample_vcon): + """Test that get prefers uncompressed file when both exist.""" + opts = {"path": temp_storage_dir, "compression": False} + + # Create both compressed and uncompressed files with different content + uncompressed_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json" + compressed_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json.gz" + + with open(uncompressed_path, "w") as f: + json.dump({"uuid": sample_vcon.uuid, "marker": "uncompressed"}, f) + + with gzip.open(compressed_path, "wt") as f: + json.dump({"uuid": sample_vcon.uuid, "marker": "compressed"}, f) + + result = get(sample_vcon.uuid, opts) + assert result["marker"] == "uncompressed" + + +class TestDelete: + """Tests for delete function.""" + + def test_delete_basic(self, temp_storage_dir, sample_vcon): + """Test basic delete operation.""" + opts = {"path": temp_storage_dir, "compression": False} + + # Create a test file + file_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json" + file_path.write_text(sample_vcon.dumps()) + assert file_path.exists() + + result = delete(sample_vcon.uuid, opts) + assert result is True + assert not file_path.exists() + + def test_delete_compressed(self, temp_storage_dir, sample_vcon): + """Test delete with compressed file.""" + opts = {"path": temp_storage_dir, "compression": True} + + # Create a compressed test file + file_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json.gz" + with gzip.open(file_path, "wt") as f: + f.write(sample_vcon.dumps()) + assert file_path.exists() + + result = delete(sample_vcon.uuid, opts) + assert result is True + assert not file_path.exists() + + def test_delete_not_found(self, temp_storage_dir): + """Test delete returns False for non-existent file.""" + opts = {"path": temp_storage_dir, "compression": False} + result = delete("nonexistent-uuid", opts) + assert result is False + + def test_delete_cleans_empty_directories(self, temp_storage_dir, sample_vcon): + """Test that delete cleans up empty parent directories.""" + opts = {"path": temp_storage_dir, "compression": False} + + # Create a date-organized test file + date_path = Path(temp_storage_dir) / "2024" / "03" / "15" + date_path.mkdir(parents=True) + file_path = date_path / f"{sample_vcon.uuid}.json" + file_path.write_text(sample_vcon.dumps()) + + result = delete(sample_vcon.uuid, opts) + assert result is True + + # Empty date directories should be cleaned up + assert not (Path(temp_storage_dir) / "2024" / "03" / "15").exists() + assert not (Path(temp_storage_dir) / "2024" / "03").exists() + assert not (Path(temp_storage_dir) / "2024").exists() + + +class TestExists: + """Tests for exists function.""" + + def test_exists_true(self, temp_storage_dir, sample_vcon): + """Test exists returns True for existing file.""" + opts = {"path": temp_storage_dir, "compression": False} + + # Create a test file + file_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json" + file_path.write_text(sample_vcon.dumps()) + + assert exists(sample_vcon.uuid, opts) is True + + def test_exists_false(self, temp_storage_dir): + """Test exists returns False for non-existent file.""" + opts = {"path": temp_storage_dir, "compression": False} + assert exists("nonexistent-uuid", opts) is False + + +class TestListVcons: + """Tests for list_vcons function.""" + + def test_list_vcons_empty(self, temp_storage_dir): + """Test list_vcons returns empty list for empty directory.""" + opts = {"path": temp_storage_dir} + result = list_vcons(opts) + assert result == [] + + def test_list_vcons_basic(self, temp_storage_dir): + """Test list_vcons returns all UUIDs.""" + opts = {"path": temp_storage_dir} + + # Create some test files + uuids = ["uuid-1", "uuid-2", "uuid-3"] + for uuid in uuids: + (Path(temp_storage_dir) / f"{uuid}.json").write_text("{}") + + result = list_vcons(opts) + assert len(result) == 3 + assert set(result) == set(uuids) + + def test_list_vcons_with_pagination(self, temp_storage_dir): + """Test list_vcons respects limit and offset.""" + opts = {"path": temp_storage_dir} + + # Create test files + for i in range(5): + (Path(temp_storage_dir) / f"uuid-{i}.json").write_text("{}") + # Ensure different modification times + import time + time.sleep(0.01) + + result = list_vcons(opts, limit=2, offset=1) + assert len(result) == 2 + + def test_list_vcons_includes_compressed(self, temp_storage_dir): + """Test list_vcons includes compressed files.""" + opts = {"path": temp_storage_dir} + + # Create both compressed and uncompressed files + (Path(temp_storage_dir) / "uuid-1.json").write_text("{}") + with gzip.open(Path(temp_storage_dir) / "uuid-2.json.gz", "wt") as f: + f.write("{}") + + result = list_vcons(opts) + assert len(result) == 2 + assert "uuid-1" in result + assert "uuid-2" in result + + def test_list_vcons_from_nested_dirs(self, temp_storage_dir): + """Test list_vcons finds files in nested directories.""" + opts = {"path": temp_storage_dir} + + # Create files in nested directories + nested = Path(temp_storage_dir) / "2024" / "03" / "15" + nested.mkdir(parents=True) + (nested / "uuid-nested.json").write_text("{}") + (Path(temp_storage_dir) / "uuid-flat.json").write_text("{}") + + result = list_vcons(opts) + assert len(result) == 2 + assert "uuid-nested" in result + assert "uuid-flat" in result + + +class TestCleanupEmptyDirs: + """Tests for _cleanup_empty_dirs helper function.""" + + def test_cleanup_removes_empty_dirs(self, temp_storage_dir): + """Test that empty directories are removed.""" + base_path = Path(temp_storage_dir) + nested_path = base_path / "a" / "b" / "c" + nested_path.mkdir(parents=True) + + _cleanup_empty_dirs(nested_path, base_path) + + assert not (base_path / "a" / "b" / "c").exists() + assert not (base_path / "a" / "b").exists() + assert not (base_path / "a").exists() + + def test_cleanup_stops_at_non_empty_dir(self, temp_storage_dir): + """Test that cleanup stops at non-empty directories.""" + base_path = Path(temp_storage_dir) + nested_path = base_path / "a" / "b" / "c" + nested_path.mkdir(parents=True) + + # Add a file to middle directory + (base_path / "a" / "b" / "other.txt").write_text("content") + + _cleanup_empty_dirs(nested_path, base_path) + + assert not (base_path / "a" / "b" / "c").exists() + assert (base_path / "a" / "b").exists() + + def test_cleanup_stops_at_base_path(self, temp_storage_dir): + """Test that cleanup doesn't remove the base path.""" + base_path = Path(temp_storage_dir) + + _cleanup_empty_dirs(base_path, base_path) + + assert base_path.exists() + + +class TestFindVconFile: + """Tests for _find_vcon_file helper function.""" + + def test_find_in_flat_structure(self, temp_storage_dir, sample_vcon): + """Test finding file in flat structure.""" + opts = {"path": temp_storage_dir, "compression": False} + + file_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json" + file_path.write_text(sample_vcon.dumps()) + + result = _find_vcon_file(sample_vcon.uuid, opts) + assert result == file_path + + def test_find_in_nested_structure(self, temp_storage_dir, sample_vcon): + """Test finding file in nested directory.""" + opts = {"path": temp_storage_dir, "compression": False} + + nested = Path(temp_storage_dir) / "2024" / "01" / "01" + nested.mkdir(parents=True) + file_path = nested / f"{sample_vcon.uuid}.json" + file_path.write_text(sample_vcon.dumps()) + + result = _find_vcon_file(sample_vcon.uuid, opts) + assert result == file_path + + def test_find_compressed_file(self, temp_storage_dir, sample_vcon): + """Test finding compressed file.""" + opts = {"path": temp_storage_dir, "compression": True} + + file_path = Path(temp_storage_dir) / f"{sample_vcon.uuid}.json.gz" + with gzip.open(file_path, "wt") as f: + f.write(sample_vcon.dumps()) + + result = _find_vcon_file(sample_vcon.uuid, opts) + assert result == file_path + + def test_find_not_found(self, temp_storage_dir): + """Test that None is returned when file not found.""" + opts = {"path": temp_storage_dir, "compression": False} + result = _find_vcon_file("nonexistent-uuid", opts) + assert result is None + + +class TestDefaultOptions: + """Tests for default options.""" + + def test_default_options_structure(self): + """Test that default options have required keys.""" + assert "path" in default_options + assert "organize_by_date" in default_options + assert "compression" in default_options + assert "max_file_size" in default_options + assert "file_permissions" in default_options + assert "dir_permissions" in default_options + + def test_default_path(self): + """Test default path value.""" + assert default_options["path"] == "/data/vcons" + + def test_default_max_file_size(self): + """Test default max file size is 10MB.""" + assert default_options["max_file_size"] == 10485760 From 3c0fc55e8907391c051466909a18f1a235057207 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 10 Dec 2025 20:26:08 +0000 Subject: [PATCH 2/8] Fix: Improve vCon file size error message Co-authored-by: thomas.howe --- server/storage/file/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/storage/file/__init__.py b/server/storage/file/__init__.py index 06ee4b2..2794083 100644 --- a/server/storage/file/__init__.py +++ b/server/storage/file/__init__.py @@ -114,9 +114,10 @@ def save(vcon_uuid: str, opts: dict = None) -> None: # Check file size limit max_size = opts.get("max_file_size", default_options["max_file_size"]) - if len(vcon_data.encode("utf-8")) > max_size: + vcon_size_bytes = len(vcon_data.encode("utf-8")) + if vcon_size_bytes > max_size: raise ValueError( - f"vCon data exceeds max file size: {len(vcon_data)} > {max_size}" + f"vCon data exceeds max file size: {vcon_size_bytes} bytes > {max_size} bytes" ) # Get the file path From 55e07965dc04c4835c121769429920894414431c Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 10 Dec 2025 20:29:15 +0000 Subject: [PATCH 3/8] Refactor: Use octal literal for file and dir permissions Co-authored-by: thomas.howe --- example_config.yml | 4 ++-- server/storage/file/README.md | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/example_config.yml b/example_config.yml index 279effd..0ae27dc 100644 --- a/example_config.yml +++ b/example_config.yml @@ -112,8 +112,8 @@ storages: organize_by_date: true compression: false max_file_size: 10485760 - file_permissions: 0644 - dir_permissions: 0755 + file_permissions: 0o644 + dir_permissions: 0o755 mongo: module: storage.mongo options: diff --git a/server/storage/file/README.md b/server/storage/file/README.md index f1b9153..355a27f 100644 --- a/server/storage/file/README.md +++ b/server/storage/file/README.md @@ -19,8 +19,8 @@ storages: organize_by_date: true # Store in YYYY/MM/DD subdirectories compression: false # Enable gzip compression max_file_size: 10485760 # Max file size in bytes (10MB) - file_permissions: 0644 # Unix file permissions (octal) - dir_permissions: 0755 # Unix directory permissions (octal) + file_permissions: 0o644 # Unix file permissions (octal) + dir_permissions: 0o755 # Unix directory permissions (octal) ``` ### Configuration Options @@ -31,8 +31,8 @@ storages: | `organize_by_date` | boolean | `true` | Organize files in YYYY/MM/DD subdirectories based on vCon creation date | | `compression` | boolean | `false` | Enable gzip compression (files saved as `.json.gz`) | | `max_file_size` | integer | `10485760` | Maximum file size in bytes (10MB default) | -| `file_permissions` | integer | `0644` | Unix permissions for created files | -| `dir_permissions` | integer | `0755` | Unix permissions for created directories | +| `file_permissions` | integer | `0o644` | Unix permissions for created files | +| `dir_permissions` | integer | `0o755` | Unix permissions for created directories | ## Features @@ -147,7 +147,7 @@ List vCon UUIDs in storage with pagination support. Returns UUIDs sorted by modi 1. **Use compression** for large vCons to save disk space 2. **Enable date organization** for easier manual browsing and archival -3. **Set appropriate permissions** for security (default 0644 for files) +3. **Set appropriate permissions** for security (default 0o644 for files) 4. **Monitor disk space** - implement cleanup policies for old files 5. **Configure volume mounts** in Docker for data persistence 6. **Set reasonable file size limits** to prevent runaway storage From 0f80b931d048df178e060c6e19ac799c646e97ae Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Wed, 10 Dec 2025 15:30:15 -0500 Subject: [PATCH 4/8] Update server/storage/file/test_file_storage.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- server/storage/file/test_file_storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/server/storage/file/test_file_storage.py b/server/storage/file/test_file_storage.py index ddc1315..90d3bf1 100644 --- a/server/storage/file/test_file_storage.py +++ b/server/storage/file/test_file_storage.py @@ -16,7 +16,6 @@ import tempfile from pathlib import Path from unittest.mock import patch, MagicMock -from datetime import datetime from server.storage.file import ( save, From 748e4db121e6b5fbf2ff76eefd8c62d1934881b7 Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Wed, 10 Dec 2025 15:30:37 -0500 Subject: [PATCH 5/8] Update server/storage/file/__init__.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- server/storage/file/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/storage/file/__init__.py b/server/storage/file/__init__.py index 2794083..f206e36 100644 --- a/server/storage/file/__init__.py +++ b/server/storage/file/__init__.py @@ -271,8 +271,9 @@ def list_vcons(opts: dict = None, limit: int = 100, offset: int = 0) -> list[str try: # Find all vCon files - pattern = str(base_path / "**" / "*.json*") - all_files = glob(pattern, recursive=True) + pattern_json = str(base_path / "**" / "*.json") + pattern_json_gz = str(base_path / "**" / "*.json.gz") + all_files = glob(pattern_json, recursive=True) + glob(pattern_json_gz, recursive=True) # Extract UUIDs from filenames uuids = [] From 2c5c14c6e83255721e31fd8d938326a42753b349 Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Wed, 10 Dec 2025 15:30:52 -0500 Subject: [PATCH 6/8] Update server/storage/file/__init__.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- server/storage/file/__init__.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/server/storage/file/__init__.py b/server/storage/file/__init__.py index f206e36..32a4282 100644 --- a/server/storage/file/__init__.py +++ b/server/storage/file/__init__.py @@ -55,9 +55,17 @@ def _get_file_path(vcon_uuid: str, opts: dict, created_at: Optional[str] = None) def _ensure_directory(file_path: Path, dir_permissions: int) -> None: """Ensure the parent directory exists with proper permissions.""" file_path.parent.mkdir(parents=True, exist_ok=True) + # Apply permissions to all directories in the path up to the base try: + # Walk from the base path up to the parent directory + base_path = file_path.anchor if file_path.is_absolute() else Path('.') + for parent in file_path.parent.relative_to(base_path).parents: + dir_to_chmod = base_path / parent + if dir_to_chmod.exists(): + os.chmod(dir_to_chmod, dir_permissions) + # Also chmod the immediate parent directory os.chmod(file_path.parent, dir_permissions) - except OSError: + except Exception: pass # May fail on some systems, not critical From 251473b7261ec55485f3e36559b8dd55d3f69f84 Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Wed, 10 Dec 2025 15:31:25 -0500 Subject: [PATCH 7/8] Update server/storage/file/__init__.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- server/storage/file/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/server/storage/file/__init__.py b/server/storage/file/__init__.py index 32a4282..526cf23 100644 --- a/server/storage/file/__init__.py +++ b/server/storage/file/__init__.py @@ -12,7 +12,6 @@ import os import json import gzip -import shutil from glob import glob from pathlib import Path from typing import Optional From 28010fba24667223fed2d0540ecf3c756925ad9c Mon Sep 17 00:00:00 2001 From: Thomas Howe Date: Wed, 10 Dec 2025 15:31:41 -0500 Subject: [PATCH 8/8] Update server/storage/file/__init__.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- server/storage/file/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/storage/file/__init__.py b/server/storage/file/__init__.py index 526cf23..0b559bb 100644 --- a/server/storage/file/__init__.py +++ b/server/storage/file/__init__.py @@ -181,7 +181,7 @@ def get(vcon_uuid: str, opts: dict = None) -> Optional[dict]: return None # Read the file (handle both compressed and uncompressed) - if file_path.suffix == ".gz" or str(file_path).endswith(".json.gz"): + if str(file_path).endswith(".json.gz"): with gzip.open(file_path, "rt", encoding="utf-8") as f: data = json.load(f) else: