diff --git a/BUILD b/BUILD index dbb881f..cd14d0d 100644 --- a/BUILD +++ b/BUILD @@ -1 +1 @@ -7d6fece +8ebb9b4 diff --git a/VERSION b/VERSION index 2e0e38c..c044b1a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.9 +1.10 diff --git a/docs/conf.py b/docs/conf.py index a56ce9e..3feebf0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,6 +47,7 @@ "sphinx_autodoc_typehints", "myst_parser", "sphinxcontrib.mermaid", + "user_workspaces_server.sphinx_ext.json_schema_autodoc", ] templates_path = ["_templates"] diff --git a/docs/controllers.rst b/docs/controllers.rst index 7691ab9..c620ee2 100644 --- a/docs/controllers.rst +++ b/docs/controllers.rst @@ -114,5 +114,6 @@ Adding New Controllers 1. Implement the appropriate abstract base class 2. Add class name mapping in ``utils.translate_class_to_module()`` -3. Update configuration JSON files to register the new controller -4. The system will automatically discover and load the controller at startup \ No newline at end of file +3. Add new JSON schema in ``schemas`` directory +4. Update configuration JSON files to register the new controller +5. The system will automatically discover and load the controller at startup \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 8d1080b..cff948e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -18,6 +18,12 @@ User Workspaces Server is a Django-based REST API that provides workspace and jo api modules +.. toctree:: + :maxdepth: 2 + :caption: Configuration: + + config_reference + .. toctree:: :maxdepth: 1 :caption: Development: diff --git a/src/tests/unittests/test_json_schema_validation.py b/src/tests/unittests/test_json_schema_validation.py new file mode 100644 index 0000000..5422390 --- /dev/null +++ b/src/tests/unittests/test_json_schema_validation.py @@ -0,0 +1,250 @@ +""" +Tests for JSON Schema-based configuration validation system. + +Tests schema loading, validation logic, and error reporting +for config.json using JSON schemas. +""" + +import copy + +from django.test import TestCase + +from user_workspaces_server.config_schemas.json_schema_loader import get_schema_loader +from user_workspaces_server.config_schemas.json_schema_validator import ( + JSONSchemaConfigValidator, + ValidationError, +) + + +class JSONSchemaConfigValidatorTests(TestCase): + """Tests for the JSONSchemaConfigValidator class.""" + + def setUp(self): + """Set up test fixtures.""" + self.validator = JSONSchemaConfigValidator() + + # Valid minimal config for testing + self.valid_config = { + "api_user_authentication": "main_auth", + "main_storage": "main_storage", + "main_resource": "main_resource", + "available_user_authentication": { + "main_auth": { + "name": "Local Auth", + "user_authentication_type": "LocalUserAuthentication", + "connection_details": {"operating_system": "linux"}, + } + }, + "available_storage": { + "main_storage": { + "name": "Local Storage", + "storage_type": "LocalFileSystemStorage", + "user_authentication": "main_auth", + "root_dir": "/tmp/workspaces", + } + }, + "available_resources": { + "main_resource": { + "name": "Local Resource", + "resource_type": "LocalResource", + "storage": "main_storage", + "user_authentication": "main_auth", + "passthrough_domain": "127.0.0.1:8000", + "connection_details": {}, + } + }, + "available_job_types": { + "test_job": { + "name": "Test Job", + "job_type": "LocalTestJob", + "environment_details": {}, + } + }, + "parameters": [], + } + + def test_valid_config_passes(self): + """Test that a valid configuration passes validation.""" + result = self.validator.validate_uws_config(self.valid_config) + self.assertTrue(result) + + def test_missing_top_level_field_fails(self): + """Test that missing required top-level fields cause validation to fail.""" + config = copy.deepcopy(self.valid_config) + del config["api_user_authentication"] + + with self.assertRaises(ValidationError) as context: + self.validator.validate_uws_config(config) + + self.assertIn( + "Missing required top-level field: api_user_authentication", + context.exception.errors, + ) + + def test_invalid_reference_fails(self): + """Test that invalid references between config sections fail validation.""" + config = copy.deepcopy(self.valid_config) + config["api_user_authentication"] = "nonexistent_auth" + + with self.assertRaises(ValidationError) as context: + self.validator.validate_uws_config(config) + + self.assertIn("not found in available_user_authentication", str(context.exception.errors)) + + def test_missing_controller_type_fails(self): + """Test that missing controller type field fails validation.""" + config = copy.deepcopy(self.valid_config) + del config["available_resources"]["main_resource"]["resource_type"] + + with self.assertRaises(ValidationError) as context: + self.validator.validate_uws_config(config) + + self.assertIn("Missing 'resource_type'", str(context.exception.errors)) + + def test_unknown_controller_type_fails(self): + """Test that unknown controller types fail validation.""" + config = copy.deepcopy(self.valid_config) + config["available_resources"]["main_resource"]["resource_type"] = "UnknownResource" + + with self.assertRaises(ValidationError) as context: + self.validator.validate_uws_config(config) + + self.assertIn("Unknown resource_type", str(context.exception.errors)) + + def test_missing_required_field_fails(self): + """Test that missing required fields in controller config fail validation.""" + config = copy.deepcopy(self.valid_config) + del config["available_storage"]["main_storage"]["root_dir"] + + with self.assertRaises(ValidationError) as context: + self.validator.validate_uws_config(config) + + self.assertIn("Missing required field 'root_dir'", str(context.exception.errors)) + + def test_invalid_storage_reference_fails(self): + """Test that invalid storage reference in resource config fails.""" + config = copy.deepcopy(self.valid_config) + config["available_resources"]["main_resource"]["storage"] = "nonexistent_storage" + + with self.assertRaises(ValidationError) as context: + self.validator.validate_uws_config(config) + + self.assertIn("not found in available_storage", str(context.exception.errors)) + + +class JSONSchemaLoaderTests(TestCase): + """Tests for the JSON schema loader.""" + + def setUp(self): + """Set up test fixtures.""" + self.schema_loader = get_schema_loader() + + def test_resource_schemas_loaded(self): + """Test that resource schemas are properly loaded.""" + schema = self.schema_loader.get_resource_schema("LocalResource") + self.assertIsNotNone(schema) + self.assertEqual(schema.controller_name, "LocalResource") + self.assertEqual(schema.category, "resource") + + def test_storage_schemas_loaded(self): + """Test that storage schemas are properly loaded.""" + schema = self.schema_loader.get_storage_schema("LocalFileSystemStorage") + self.assertIsNotNone(schema) + self.assertEqual(schema.controller_name, "LocalFileSystemStorage") + self.assertEqual(schema.category, "storage") + + def test_authentication_schemas_loaded(self): + """Test that authentication schemas are properly loaded.""" + schema = self.schema_loader.get_authentication_schema("GlobusUserAuthentication") + self.assertIsNotNone(schema) + self.assertEqual(schema.controller_name, "GlobusUserAuthentication") + self.assertEqual(schema.category, "authentication") + + def test_job_type_schemas_loaded(self): + """Test that job type schemas are properly loaded.""" + schema = self.schema_loader.get_job_type_schema("JupyterLabJob") + self.assertIsNotNone(schema) + self.assertEqual(schema.controller_name, "JupyterLabJob") + self.assertEqual(schema.category, "job_type") + + def test_get_all_schemas(self): + """Test that get_all_schemas returns all loaded schemas.""" + all_schemas = self.schema_loader.get_all_schemas() + + self.assertIn("resources", all_schemas) + self.assertIn("storage", all_schemas) + self.assertIn("authentication", all_schemas) + self.assertIn("job_types", all_schemas) + + # Check that we have some schemas in each category + self.assertGreater(len(all_schemas["resources"]), 0) + self.assertGreater(len(all_schemas["storage"]), 0) + self.assertGreater(len(all_schemas["authentication"]), 0) + self.assertGreater(len(all_schemas["job_types"]), 0) + + def test_schema_has_required_fields(self): + """Test that schemas correctly identify required fields.""" + schema = self.schema_loader.get_storage_schema("LocalFileSystemStorage") + required_fields = schema.get_required_fields() + + self.assertIn("name", required_fields) + self.assertIn("storage_type", required_fields) + self.assertIn("user_authentication", required_fields) + self.assertIn("root_dir", required_fields) + + def test_schema_has_optional_fields(self): + """Test that schemas correctly identify optional fields.""" + schema = self.schema_loader.get_resource_schema("LocalResource") + optional_fields = schema.get_optional_fields() + + self.assertIn("passthrough_domain", optional_fields) + self.assertIn("parameter_mapping", optional_fields) + self.assertIn("connection_details", optional_fields) + + +class SpecificControllerTests(TestCase): + """Tests for specific controller schemas.""" + + def setUp(self): + """Set up test fixtures.""" + self.validator = JSONSchemaConfigValidator() + self.schema_loader = get_schema_loader() + + def test_slurm_resource_requires_connection_details(self): + """Test that SlurmAPIResource validates connection_details.""" + schema = self.schema_loader.get_resource_schema("SlurmAPIResource") + self.assertIsNotNone(schema) + + config = { + "name": "SLURM Resource", + "resource_type": "SlurmAPIResource", + "storage": "main_storage", + "user_authentication": "main_auth", + # Missing connection_details + } + + result = self.validator.validate_controller_config(config, schema, "slurm_resource") + self.assertFalse(result) + self.assertTrue(any("connection_details" in error for error in self.validator.errors)) + + def test_globus_auth_validates_authentication_type_choices(self): + """Test that GlobusUserAuthentication validates authentication_type choices.""" + schema = self.schema_loader.get_authentication_schema("GlobusUserAuthentication") + self.assertIsNotNone(schema) + + # Check that authentication_type field has enum + connection_details = schema.get_field("connection_details") + self.assertIsNotNone(connection_details) + auth_type_field = connection_details.get("properties", {}).get("authentication_type") + self.assertIsNotNone(auth_type_field) + self.assertIn("enum", auth_type_field) + self.assertIn("oauth", auth_type_field["enum"]) + self.assertIn("token", auth_type_field["enum"]) + + def test_jupyter_lab_job_requires_environment_details(self): + """Test that JupyterLabJob requires environment_details.""" + schema = self.schema_loader.get_job_type_schema("JupyterLabJob") + self.assertIsNotNone(schema) + + required_fields = schema.get_required_fields() + self.assertIn("environment_details", required_fields) diff --git a/src/user_workspaces_server/config_schemas/__init__.py b/src/user_workspaces_server/config_schemas/__init__.py new file mode 100644 index 0000000..b4beaf7 --- /dev/null +++ b/src/user_workspaces_server/config_schemas/__init__.py @@ -0,0 +1,24 @@ +""" +Configuration schema definitions and validation for User Workspaces Server. + +This module provides a JSON Schema-based system for validating configuration +files (config.json) at startup time. +""" + +from user_workspaces_server.config_schemas.json_schema_loader import ( + JSONSchemaLoader, + JSONSchemaWrapper, + get_schema_loader, +) +from user_workspaces_server.config_schemas.json_schema_validator import ( + JSONSchemaConfigValidator, + ValidationError, +) + +__all__ = [ + "JSONSchemaWrapper", + "JSONSchemaLoader", + "JSONSchemaConfigValidator", + "get_schema_loader", + "ValidationError", +] diff --git a/src/user_workspaces_server/config_schemas/json_schema_loader.py b/src/user_workspaces_server/config_schemas/json_schema_loader.py new file mode 100644 index 0000000..0858cf8 --- /dev/null +++ b/src/user_workspaces_server/config_schemas/json_schema_loader.py @@ -0,0 +1,151 @@ +""" +JSON Schema loader for configuration validation. + +This module loads JSON Schema files for validation and documentation generation. +""" + +import json +from pathlib import Path +from typing import Any, Dict, List, Optional + + +class JSONSchemaWrapper: + """Wrapper around a JSON Schema providing convenient access methods.""" + + def __init__(self, schema: Dict[str, Any], file_path: str): + """ + Initialize a JSONSchemaWrapper. + + Args: + schema: The parsed JSON Schema dictionary + file_path: Path to the schema file + """ + self.schema = schema + self.file_path = file_path + self.controller_name = schema.get("$id", schema.get("title", "")) + self.category = schema.get("x-category", "") + self.description = schema.get("description", "") + self.properties = schema.get("properties", {}) + self.required_fields = schema.get("required", []) + self.examples = schema.get("examples", []) + + @property + def fields(self) -> Dict[str, Dict[str, Any]]: + """Get all fields (properties) from the schema.""" + return self.properties + + def get_required_fields(self) -> List[str]: + """Get list of required field names.""" + return self.required_fields.copy() + + def get_optional_fields(self) -> List[str]: + """Get list of optional field names.""" + return [name for name in self.properties.keys() if name not in self.required_fields] + + def get_field(self, field_name: str) -> Optional[Dict[str, Any]]: + """ + Get a specific field definition. + + Args: + field_name: Name of the field + + Returns: + Field definition dict or None if not found + """ + return self.properties.get(field_name) + + @property + def example(self) -> Dict[str, Any]: + """Get the first example (for backwards compatibility with YAML schemas).""" + return self.examples[0] if self.examples else {} + + +class JSONSchemaLoader: + """Loads and manages JSON Schema files.""" + + def __init__(self, schemas_dir: str = None): + """ + Initialize the JSON Schema loader. + + Args: + schemas_dir: Path to directory containing JSON schemas. + Defaults to schemas/ subdirectory of this module. + """ + if schemas_dir is None: + module_dir = Path(__file__).parent + schemas_dir = module_dir / "schemas" + + self.schemas_dir = Path(schemas_dir) + self._schemas = { + "resources": {}, + "storage": {}, + "authentication": {}, + "job_types": {}, + } + self._load_all_schemas() + + def _load_json_file(self, file_path: Path) -> JSONSchemaWrapper: + """Load a single JSON Schema file.""" + with open(file_path, "r") as f: + schema_data = json.load(f) + + return JSONSchemaWrapper(schema_data, str(file_path)) + + def _load_all_schemas(self): + """Load all JSON Schema files from the schemas directory.""" + for category in ["resources", "storage", "authentication", "job_types"]: + category_dir = self.schemas_dir / category + if not category_dir.exists(): + continue + + for json_file in category_dir.glob("*.json"): + try: + schema = self._load_json_file(json_file) + self._schemas[category][schema.controller_name] = schema + except Exception as e: + print(f"Warning: Failed to load schema {json_file}: {e}") + + def get_schema(self, category: str, controller_name: str) -> Optional[JSONSchemaWrapper]: + """ + Get a schema by category and controller name. + + Args: + category: Schema category (resources, storage, authentication, job_types) + controller_name: Name of the controller + + Returns: + JSONSchemaWrapper object or None if not found + """ + return self._schemas.get(category, {}).get(controller_name) + + def get_all_schemas(self) -> Dict[str, Dict[str, JSONSchemaWrapper]]: + """Get all loaded schemas organized by category.""" + return self._schemas.copy() + + def get_resource_schema(self, controller_name: str) -> Optional[JSONSchemaWrapper]: + """Get a resource schema by controller name.""" + return self.get_schema("resources", controller_name) + + def get_storage_schema(self, controller_name: str) -> Optional[JSONSchemaWrapper]: + """Get a storage schema by controller name.""" + return self.get_schema("storage", controller_name) + + def get_authentication_schema(self, controller_name: str) -> Optional[JSONSchemaWrapper]: + """Get an authentication schema by controller name.""" + return self.get_schema("authentication", controller_name) + + def get_job_type_schema(self, controller_name: str) -> Optional[JSONSchemaWrapper]: + """Get a job type schema by controller name.""" + return self.get_schema("job_types", controller_name) + + +# Global schema loader instance +_schema_loader = None + + +def get_schema_loader() -> JSONSchemaLoader: + """Get the global schema loader instance.""" + global _schema_loader + if _schema_loader is None: + _schema_loader = JSONSchemaLoader() + return _schema_loader diff --git a/src/user_workspaces_server/config_schemas/json_schema_validator.py b/src/user_workspaces_server/config_schemas/json_schema_validator.py new file mode 100644 index 0000000..6e39bc2 --- /dev/null +++ b/src/user_workspaces_server/config_schemas/json_schema_validator.py @@ -0,0 +1,282 @@ +""" +Configuration validation using JSON Schema. + +This module provides validation logic for configuration files using +JSON Schema definitions. +""" + +from typing import Any, Dict, List + +import jsonschema +from jsonschema import Draft7Validator + +from user_workspaces_server.config_schemas.json_schema_loader import get_schema_loader + + +class ValidationError(Exception): + """Exception raised when configuration validation fails.""" + + def __init__(self, errors: List[str]): + self.errors = errors + message = "Configuration validation failed:\n" + "\n".join(f" - {e}" for e in errors) + super().__init__(message) + + +class JSONSchemaConfigValidator: + """Validates configuration dictionaries against JSON schemas.""" + + def __init__(self): + """Initialize the validator.""" + self.errors: List[str] = [] + self.schema_loader = get_schema_loader() + + def _format_validation_error(self, error: jsonschema.ValidationError, path: str = "") -> str: + """ + Format a jsonschema validation error into a human-readable message. + + Args: + error: The validation error from jsonschema + path: The current path in the config (for nested validation) + + Returns: + Formatted error message + """ + field_path = ".".join(str(p) for p in error.absolute_path) if error.absolute_path else "" + full_path = f"{path}.{field_path}" if path and field_path else path or field_path + + # Handle different error types + if error.validator == "required": + missing_field = error.message.split("'")[1] + return f"{full_path}: Missing required field '{missing_field}'" + elif error.validator == "type": + return f"{full_path}: {error.message}" + elif error.validator == "enum": + return f"{full_path}: {error.message}" + elif error.validator == "minLength": + return f"{full_path}: {error.message}" + elif error.validator == "maxLength": + return f"{full_path}: {error.message}" + else: + return f"{full_path}: {error.message}" if full_path else error.message + + def validate_with_schema( + self, + config: Dict[str, Any], + schema_dict: Dict[str, Any], + path: str = "", + ) -> bool: + """ + Validate a configuration dictionary against a JSON schema. + + Args: + config: The configuration dictionary to validate + schema_dict: The JSON Schema to validate against + path: The current path in the config (for error messages) + + Returns: + True if validation passes, False otherwise + """ + validator = Draft7Validator(schema_dict) + valid = True + + for error in validator.iter_errors(config): + self.errors.append(self._format_validation_error(error, path)) + valid = False + + return valid + + def validate_controller_config( + self, config: Dict[str, Any], schema_wrapper, config_key: str = "" + ) -> bool: + """ + Validate a controller configuration against its schema. + + Args: + config: The configuration dictionary to validate + schema_wrapper: The JSONSchemaWrapper to validate against + config_key: The key in the config file (for error messages) + + Returns: + True if validation passes, False otherwise + """ + path = config_key if config_key else schema_wrapper.controller_name + return self.validate_with_schema(config, schema_wrapper.schema, path) + + def validate_uws_config(self, config: Dict[str, Any]) -> bool: + """ + Validate the complete UWS config.json file. + + Args: + config: The loaded config.json dictionary + + Returns: + True if validation passes, False otherwise + + Raises: + ValidationError: If validation fails + """ + self.errors = [] + valid = True + + # Validate top-level required fields + required_top_level = [ + "api_user_authentication", + "main_storage", + "main_resource", + "available_user_authentication", + "available_storage", + "available_resources", + "available_job_types", + "parameters", + ] + + for field in required_top_level: + if field not in config: + self.errors.append(f"Missing required top-level field: {field}") + valid = False + + if not valid: + raise ValidationError(self.errors) + + # Validate main references + if config.get("api_user_authentication") not in config.get( + "available_user_authentication", {} + ): + self.errors.append( + f"api_user_authentication '{config.get('api_user_authentication')}' " + f"not found in available_user_authentication" + ) + valid = False + + if config.get("main_storage") not in config.get("available_storage", {}): + self.errors.append( + f"main_storage '{config.get('main_storage')}' not found in available_storage" + ) + valid = False + + if config.get("main_resource") not in config.get("available_resources", {}): + self.errors.append( + f"main_resource '{config.get('main_resource')}' not found in available_resources" + ) + valid = False + + # Validate authentication methods + for auth_key, auth_config in config.get("available_user_authentication", {}).items(): + auth_type = auth_config.get("user_authentication_type") + if not auth_type: + self.errors.append( + f"available_user_authentication.{auth_key}: Missing 'user_authentication_type'" + ) + valid = False + continue + + schema = self.schema_loader.get_authentication_schema(auth_type) + if schema: + valid = ( + self.validate_controller_config( + auth_config, schema, f"available_user_authentication.{auth_key}" + ) + and valid + ) + else: + self.errors.append( + f"available_user_authentication.{auth_key}: " + f"Unknown user_authentication_type '{auth_type}'" + ) + valid = False + + # Validate storage methods + for storage_key, storage_config in config.get("available_storage", {}).items(): + storage_type = storage_config.get("storage_type") + if not storage_type: + self.errors.append(f"available_storage.{storage_key}: Missing 'storage_type'") + valid = False + continue + + # Validate user_authentication reference + user_auth = storage_config.get("user_authentication") + if user_auth and user_auth not in config.get("available_user_authentication", {}): + self.errors.append( + f"available_storage.{storage_key}: " + f"user_authentication '{user_auth}' not found in available_user_authentication" + ) + valid = False + + schema = self.schema_loader.get_storage_schema(storage_type) + if schema: + valid = ( + self.validate_controller_config( + storage_config, schema, f"available_storage.{storage_key}" + ) + and valid + ) + else: + self.errors.append( + f"available_storage.{storage_key}: Unknown storage_type '{storage_type}'" + ) + valid = False + + # Validate resources + for resource_key, resource_config in config.get("available_resources", {}).items(): + resource_type = resource_config.get("resource_type") + if not resource_type: + self.errors.append(f"available_resources.{resource_key}: Missing 'resource_type'") + valid = False + continue + + # Validate storage reference + storage = resource_config.get("storage") + if storage and storage not in config.get("available_storage", {}): + self.errors.append( + f"available_resources.{resource_key}: " + f"storage '{storage}' not found in available_storage" + ) + valid = False + + # Validate user_authentication reference + user_auth = resource_config.get("user_authentication") + if user_auth and user_auth not in config.get("available_user_authentication", {}): + self.errors.append( + f"available_resources.{resource_key}: " + f"user_authentication '{user_auth}' not found in available_user_authentication" + ) + valid = False + + schema = self.schema_loader.get_resource_schema(resource_type) + if schema: + valid = ( + self.validate_controller_config( + resource_config, schema, f"available_resources.{resource_key}" + ) + and valid + ) + else: + self.errors.append( + f"available_resources.{resource_key}: Unknown resource_type '{resource_type}'" + ) + valid = False + + # Validate job types + for job_key, job_config in config.get("available_job_types", {}).items(): + job_type = job_config.get("job_type") + if not job_type: + self.errors.append(f"available_job_types.{job_key}: Missing 'job_type'") + valid = False + continue + + schema = self.schema_loader.get_job_type_schema(job_type) + if schema: + valid = ( + self.validate_controller_config( + job_config, schema, f"available_job_types.{job_key}" + ) + and valid + ) + else: + self.errors.append(f"available_job_types.{job_key}: Unknown job_type '{job_type}'") + valid = False + + if not valid: + raise ValidationError(self.errors) + + return valid diff --git a/src/user_workspaces_server/config_schemas/schemas/authentication/GlobusUserAuthentication.json b/src/user_workspaces_server/config_schemas/schemas/authentication/GlobusUserAuthentication.json new file mode 100644 index 0000000..6b3002d --- /dev/null +++ b/src/user_workspaces_server/config_schemas/schemas/authentication/GlobusUserAuthentication.json @@ -0,0 +1,64 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "GlobusUserAuthentication", + "title": "GlobusUserAuthentication", + "description": "Globus Auth integration for user authentication", + "type": "object", + "x-category": "authentication", + "required": ["name", "user_authentication_type", "connection_details"], + "properties": { + "name": { + "type": "string", + "description": "Human-readable name for this authentication method" + }, + "user_authentication_type": { + "type": "string", + "enum": ["GlobusUserAuthentication"], + "description": "Must be 'GlobusUserAuthentication'" + }, + "connection_details": { + "type": "object", + "description": "Globus authentication configuration", + "required": ["client_id", "client_secret", "authentication_type", "health_check_url"], + "properties": { + "client_id": { + "type": "string", + "description": "Globus OAuth client ID" + }, + "client_secret": { + "type": "string", + "description": "Globus OAuth client secret" + }, + "authentication_type": { + "type": "string", + "enum": ["oauth", "token"], + "description": "Authentication flow type: 'oauth' for full OAuth flow, 'token' for direct token validation" + }, + "health_check_url": { + "type": "string", + "description": "URL for health check endpoint" + }, + "allowed_globus_groups": { + "type": "array", + "items": { + "type": "string", + "description": "Globus group UUID" + }, + "description": "List of Globus group UUIDs. Users must be members of at least one group to authenticate. If empty, group checking is disabled.", + "default": [] + } + } + } + }, + "examples": [ + { + "name": "Globus Token Auth", + "user_authentication_type": "GlobusUserAuthentication", + "connection_details": { + "client_id": "your-client-id", + "client_secret": "your-client-secret", + "authentication_type": "token" + } + } + ] +} diff --git a/src/user_workspaces_server/config_schemas/schemas/authentication/LocalUserAuthentication.json b/src/user_workspaces_server/config_schemas/schemas/authentication/LocalUserAuthentication.json new file mode 100644 index 0000000..2c6e841 --- /dev/null +++ b/src/user_workspaces_server/config_schemas/schemas/authentication/LocalUserAuthentication.json @@ -0,0 +1,45 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "LocalUserAuthentication", + "title": "LocalUserAuthentication", + "description": "Local system user authentication", + "type": "object", + "x-category": "authentication", + "required": ["name", "user_authentication_type", "connection_details"], + "properties": { + "name": { + "type": "string", + "description": "Human-readable name for this authentication method" + }, + "user_authentication_type": { + "type": "string", + "enum": ["LocalUserAuthentication"], + "description": "Must be 'LocalUserAuthentication'" + }, + "connection_details": { + "type": "object", + "default": {}, + "description": "Local authentication configuration (typically empty)", + "properties": { + "create_external_users": { + "type": "boolean", + "default": false, + "description": "Whether to create a new user when they don't exist on the system" + }, + "operating_system": { + "type": "string", + "enum": ["linux", "osx"], + "description": "The operating system to which this authentication method is connected" + } + }, + "required": ["operating_system"] + } + }, + "examples": [ + { + "name": "Local Auth", + "user_authentication_type": "LocalUserAuthentication", + "connection_details": {} + } + ] +} diff --git a/src/user_workspaces_server/config_schemas/schemas/authentication/PSCAPIUserAuthentication.json b/src/user_workspaces_server/config_schemas/schemas/authentication/PSCAPIUserAuthentication.json new file mode 100644 index 0000000..ce7696b --- /dev/null +++ b/src/user_workspaces_server/config_schemas/schemas/authentication/PSCAPIUserAuthentication.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "PSCAPIUserAuthentication", + "title": "PSCAPIUserAuthentication", + "description": "PSC API authentication integration", + "type": "object", + "x-category": "authentication", + "required": ["name", "user_authentication_type", "connection_details"], + "properties": { + "name": { + "type": "string", + "description": "Human-readable name for this authentication method" + }, + "user_authentication_type": { + "type": "string", + "enum": ["PSCAPIUserAuthentication"], + "description": "Must be 'PSCAPIUserAuthentication'" + }, + "connection_details": { + "type": "object", + "description": "PSC API connection configuration", + "required": [ + "grant_number", + "jwt_token", + "ldap_base", + "ldap_password", + "ldap_uri", + "ldap_user_dn", + "resource_name", + "root_url", + "health_check_url" + ], + "properties": { + "create_external_users": { + "type": "boolean", + "default": false, + "description": "Whether to create a new user when they don't exist on the system" + }, + "grant_number": { + "type": "string", + "description": "Grant number associated with user workspaces server" + }, + "health_check_url": { + "type": "string", + "description": "URL for health check endpoint" + }, + "jwt_token": { + "type": "string", + "description": "JWT authentication token for PSC Users API" + }, + "ldap_base": { + "type": "string", + "description": "Base for PSC LDAP service" + }, + "ldap_password": { + "type": "string", + "description": "Password for PSC LDAP service" + }, + "ldap_uri": { + "type": "string", + "description": "URI for PSC LDAP service" + }, + "ldap_user_dn": { + "type": "string", + "description": "User DN for PSC LDAP service" + }, + "resource_name": { + "type": "string", + "description": "PSC resource associated with user workspaces server" + }, + "root_url": { + "type": "string", + "description": "URL for the PSC Users API" + } + } + } + }, + "examples": [ + { + "name": "PSC API Auth", + "user_authentication_type": "PSCAPIUserAuthentication", + "connection_details": { + "create_external_user": false, + "grant_number": "YOUR_GRANT_NUMBER", + "health_check_url": "https://health_check.com", + "jwt_token": "YOUR_JWT_TOKEN", + "ldap_base": "YOUR_LDAP_BASE", + "ldap_password": "YOUR_LDAP_PASSWORD", + "ldap_uri": "ldaps://ldap_uri", + "ldap_user_dn": "YOUR_LDAP_USER_DN", + "resource_name": "YOUR_RESOURCE_NAME", + "root_url": "https://root_url.com" + } + } + ] +} diff --git a/src/user_workspaces_server/config_schemas/schemas/job_types/AppyterJob.json b/src/user_workspaces_server/config_schemas/schemas/job_types/AppyterJob.json new file mode 100644 index 0000000..ecb4702 --- /dev/null +++ b/src/user_workspaces_server/config_schemas/schemas/job_types/AppyterJob.json @@ -0,0 +1,42 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "AppyterJob", + "title": "AppyterJob", + "description": "Appyter notebook application job", + "type": "object", + "x-category": "job_type", + "required": ["name", "job_type", "environment_details"], + "properties": { + "name": { + "type": "string", + "description": "Human-readable name for this job type" + }, + "job_type": { + "type": "string", + "enum": ["AppyterJob"], + "description": "Must be 'AppyterJob'" + }, + "environment_details": { + "type": "object", + "description": "Environment configuration per resource (resource name as key, config as value)", + "required": ["sif_file_path"], + "properties": { + "sif_file_path": { + "type": "string", + "description": "File path for sif image" + } + } + } + }, + "examples": [ + { + "name": "Appyter Job", + "job_type": "AppyterJob", + "environment_details": { + "main_resource": { + "sif_file_path": "/path/to/sif_file.sif" + } + } + } + ] +} diff --git a/src/user_workspaces_server/config_schemas/schemas/job_types/JupyterLabJob.json b/src/user_workspaces_server/config_schemas/schemas/job_types/JupyterLabJob.json new file mode 100644 index 0000000..32b2c8b --- /dev/null +++ b/src/user_workspaces_server/config_schemas/schemas/job_types/JupyterLabJob.json @@ -0,0 +1,72 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "JupyterLabJob", + "title": "JupyterLabJob", + "description": "JupyterLab server job with configurable Python environment", + "type": "object", + "x-category": "job_type", + "required": ["name", "job_type", "environment_details"], + "properties": { + "name": { + "type": "string", + "description": "Human-readable name for this job type" + }, + "job_type": { + "type": "string", + "enum": ["JupyterLabJob"], + "description": "Must be 'JupyterLabJob'" + }, + "environment_details": { + "type": "object", + "description": "Environment configuration per resource (resource name as key, config as value)", + "required": ["module_manager", "environment_name"], + "properties": { + "module_manager": { + "type": "string", + "enum": ["tar", "lmod", "virtualenv"], + "description": "How this environment is managed. (Tar archive, lmod module, virtual environment)" + }, + "python_version": { + "type": "string", + "description": "Python version to use. Required if using lmod or virtualenv" + }, + "modules": { + "type": "array", + "items": {"type": "string"}, + "description": "Modules to load. Required if using lmod" + }, + "python_packages": { + "type": "array", + "items": {"type": "string"}, + "description": "Python packages to install. Required if using lmod or virtualenv" + }, + "time_limit": { + "type": "string", + "description": "The default time limit for this job type in minutes" + }, + "tar_file_path": { + "type": "string", + "description": "File path for the tar archive. Required if using tar" + }, + "environment_name": { + "type": "string" + } + } + } + }, + "examples": [ + { + "name": "Jupyter Lab", + "job_type": "JupyterLabJob", + "environment_details": { + "main_resource": { + "python_version": "python3.10", + "module_manager": "virtualenv", + "modules": ["jupyterlab"], + "time_limit": "60", + "environment_name": "JupyterLabJob" + } + } + } + ] +} diff --git a/src/user_workspaces_server/config_schemas/schemas/job_types/LocalTestJob.json b/src/user_workspaces_server/config_schemas/schemas/job_types/LocalTestJob.json new file mode 100644 index 0000000..0b21e32 --- /dev/null +++ b/src/user_workspaces_server/config_schemas/schemas/job_types/LocalTestJob.json @@ -0,0 +1,34 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "LocalTestJob", + "title": "LocalTestJob", + "description": "Simple test job for development and debugging", + "type": "object", + "x-category": "job_type", + "required": ["name", "job_type"], + "properties": { + "name": { + "type": "string", + "description": "Human-readable name for this job type" + }, + "job_type": { + "type": "string", + "enum": ["LocalTestJob"], + "description": "Must be 'LocalTestJob'" + }, + "environment_details": { + "type": "object", + "default": {}, + "description": "Environment configuration per resource (typically empty for test jobs)" + } + }, + "examples": [ + { + "name": "Local Test Job", + "job_type": "LocalTestJob", + "environment_details": { + "main_resource": {} + } + } + ] +} diff --git a/src/user_workspaces_server/config_schemas/schemas/resources/LocalResource.json b/src/user_workspaces_server/config_schemas/schemas/resources/LocalResource.json new file mode 100644 index 0000000..a9f747b --- /dev/null +++ b/src/user_workspaces_server/config_schemas/schemas/resources/LocalResource.json @@ -0,0 +1,53 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "LocalResource", + "title": "LocalResource", + "description": "Local machine resource for running jobs via subprocess", + "type": "object", + "x-category": "resource", + "required": ["name", "resource_type", "storage", "user_authentication"], + "properties": { + "name": { + "type": "string", + "description": "Human-readable name for this resource instance" + }, + "resource_type": { + "type": "string", + "enum": ["LocalResource"], + "description": "Must be 'LocalResource'" + }, + "storage": { + "type": "string", + "description": "Reference to storage configuration key in available_storage" + }, + "user_authentication": { + "type": "string", + "description": "Reference to authentication configuration key in available_user_authentication" + }, + "passthrough_domain": { + "type": "string", + "default": "", + "description": "Domain for passthrough connections (e.g., '127.0.0.1:8000')" + }, + "parameter_mapping": { + "type": "object", + "default": {}, + "description": "Maps generic parameter names to resource-specific names" + }, + "connection_details": { + "type": "object", + "default": {}, + "description": "Connection configuration (typically empty for LocalResource)" + } + }, + "examples": [ + { + "name": "Local Resource", + "resource_type": "LocalResource", + "storage": "main_storage", + "user_authentication": "main_auth", + "passthrough_domain": "127.0.0.1:8000", + "connection_details": {} + } + ] +} \ No newline at end of file diff --git a/src/user_workspaces_server/config_schemas/schemas/resources/SlurmAPIResource.json b/src/user_workspaces_server/config_schemas/schemas/resources/SlurmAPIResource.json new file mode 100644 index 0000000..2d9baf8 --- /dev/null +++ b/src/user_workspaces_server/config_schemas/schemas/resources/SlurmAPIResource.json @@ -0,0 +1,87 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "SlurmAPIResource", + "title": "SlurmAPIResource", + "description": "SLURM cluster resource using SLURM REST API v0.0.40", + "type": "object", + "x-category": "resource", + "required": ["name", "resource_type", "storage", "user_authentication", "connection_details"], + "properties": { + "name": { + "type": "string", + "description": "Human-readable name for this resource instance" + }, + "resource_type": { + "type": "string", + "enum": ["SlurmAPIResource"], + "description": "Must be 'SlurmAPIResource'" + }, + "storage": { + "type": "string", + "description": "Reference to storage configuration key in available_storage" + }, + "user_authentication": { + "type": "string", + "description": "Reference to authentication configuration key in available_user_authentication" + }, + "passthrough_domain": { + "type": "string", + "default": "", + "description": "Domain for passthrough connections" + }, + "parameter_mapping": { + "type": "object", + "default": {}, + "description": "Maps generic parameter names to SLURM-specific names (e.g., num_cpus -> cpus_per_task)" + }, + "cpu_partition": { + "type": "string", + "default": "", + "description": "SLURM partition name for CPU jobs" + }, + "gpu_partition": { + "type": "string", + "default": "", + "description": "SLURM partition name for GPU jobs" + }, + "connection_details": { + "type": "object", + "description": "SLURM API connection configuration", + "required": ["root_url", "api_token"], + "properties": { + "root_url": { + "type": "string", + "description": "Base URL for SLURM REST API (e.g., 'https://slurm.example.com/api')" + }, + "api_token": { + "type": "string", + "description": "API token for SLURM REST API authentication" + }, + "token_lifespan": { + "type": "string", + "default": "3600", + "description": "Token lifespan in seconds" + }, + "health_check_url": { + "type": "string", + "description": "URL for health check endpoint" + } + } + } + }, + "examples": [ + { + "name": "SLURM Cluster", + "resource_type": "SlurmAPIResource", + "storage": "main_storage", + "user_authentication": "main_auth", + "connection_details": { + "root_url": "https://slurm.example.com/api", + "api_token": "your-api-token-here", + "token_lifespan": "3600" + }, + "cpu_partition": "cpu", + "gpu_partition": "gpu" + } + ] +} \ No newline at end of file diff --git a/src/user_workspaces_server/config_schemas/schemas/storage/HubmapLocalFileSystemStorage.json b/src/user_workspaces_server/config_schemas/schemas/storage/HubmapLocalFileSystemStorage.json new file mode 100644 index 0000000..fa274fb --- /dev/null +++ b/src/user_workspaces_server/config_schemas/schemas/storage/HubmapLocalFileSystemStorage.json @@ -0,0 +1,48 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "HubmapLocalFileSystemStorage", + "title": "HubmapLocalFileSystemStorage", + "description": "HuBMAP-specific local filesystem storage with custom directory structures", + "type": "object", + "x-category": "storage", + "required": ["name", "storage_type", "user_authentication", "root_dir", "connection_details"], + "properties": { + "name": { + "type": "string", + "description": "Human-readable name for this storage instance" + }, + "storage_type": { + "type": "string", + "enum": ["HubmapLocalFileSystemStorage"], + "description": "Must be 'HubmapLocalFileSystemStorage'" + }, + "user_authentication": { + "type": "string", + "description": "Reference to authentication configuration key in available_user_authentication" + }, + "root_dir": { + "type": "string", + "description": "Absolute path to root directory for storing workspace files" + }, + "connection_details": { + "type": "object", + "default": {}, + "description": "Connection configuration", + "required": ["root_url"], + "properties": { + "root_url": { + "type": "string", + "description": "Base URL for HuBMAP API to grab dataset file paths" + } + } + } + }, + "examples": [ + { + "name": "HuBMAP Local Storage", + "storage_type": "HubmapLocalFileSystemStorage", + "user_authentication": "main_auth", + "root_dir": "/var/hubmap/workspaces" + } + ] +} diff --git a/src/user_workspaces_server/config_schemas/schemas/storage/LocalFileSystemStorage.json b/src/user_workspaces_server/config_schemas/schemas/storage/LocalFileSystemStorage.json new file mode 100644 index 0000000..b6c4ba0 --- /dev/null +++ b/src/user_workspaces_server/config_schemas/schemas/storage/LocalFileSystemStorage.json @@ -0,0 +1,41 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "LocalFileSystemStorage", + "title": "LocalFileSystemStorage", + "description": "Local filesystem storage for workspace data", + "type": "object", + "x-category": "storage", + "required": ["name", "storage_type", "user_authentication", "root_dir"], + "properties": { + "name": { + "type": "string", + "description": "Human-readable name for this storage instance" + }, + "storage_type": { + "type": "string", + "enum": ["LocalFileSystemStorage"], + "description": "Must be 'LocalFileSystemStorage'" + }, + "user_authentication": { + "type": "string", + "description": "Reference to authentication configuration key in available_user_authentication" + }, + "root_dir": { + "type": "string", + "description": "Absolute path to root directory for storing workspace files" + }, + "connection_details": { + "type": "object", + "default": {}, + "description": "Connection configuration (typically empty for local filesystem)" + } + }, + "examples": [ + { + "name": "Local File Storage", + "storage_type": "LocalFileSystemStorage", + "user_authentication": "main_auth", + "root_dir": "/var/workspaces" + } + ] +} \ No newline at end of file diff --git a/src/user_workspaces_server/controllers/userauthenticationmethods/globus_user_authentication.py b/src/user_workspaces_server/controllers/userauthenticationmethods/globus_user_authentication.py index 2986e00..6a56d90 100644 --- a/src/user_workspaces_server/controllers/userauthenticationmethods/globus_user_authentication.py +++ b/src/user_workspaces_server/controllers/userauthenticationmethods/globus_user_authentication.py @@ -4,7 +4,7 @@ import globus_sdk from flask.wrappers import Response as flask_response from hubmap_commons.hm_auth import AuthHelper -from rest_framework.exceptions import ParseError +from rest_framework.exceptions import ParseError, PermissionDenied from rest_framework.response import Response from user_workspaces_server.controllers.userauthenticationmethods.abstract_user_authentication import ( @@ -21,13 +21,108 @@ def __init__(self, config): client_secret = self.connection_details["client_secret"] self.authentication_type = self.connection_details["authentication_type"] self.oauth = globus_sdk.ConfidentialAppAuthClient(client_id, client_secret) + self.allowed_globus_groups = self.connection_details.get("allowed_globus_groups", []) if not AuthHelper.isInitialized(): self.auth_helper = AuthHelper.create(clientId=client_id, clientSecret=client_secret) else: self.auth_helper = AuthHelper.instance() def has_permission(self, internal_user): - pass + """ + Verify user has permission by checking external user mapping exists + and optionally validating Globus group membership. + + Returns: + ExternalUserMapping on success, False on failure + """ + external_user_mapping = self.get_external_user_mapping( + {"user_id": internal_user, "user_authentication_name": type(self).__name__} + ) + + if not external_user_mapping: + # No mapping exists - user needs to authenticate first + return False + + # If group checking is enabled, validate membership + if self.allowed_globus_groups: + try: + # Extract groups token from stored external_user_details + external_user_details = external_user_mapping.external_user_details or {} + groups_token = external_user_details.get("globus_groups_token") + + if not groups_token: + logger.error( + f"Groups token not found for user {internal_user.username}. " + "User may need to re-authenticate." + ) + return False + + # Check if user is still a member of allowed groups + if not self._check_group_membership( + groups_token, external_user_mapping.external_user_id + ): + logger.warning( + f"User {internal_user.username} is no longer a member of allowed Globus groups." + ) + return False + + except Exception as e: + logger.error( + f"Error checking group membership for {internal_user.username}: {repr(e)}" + ) + return False + + # User has valid mapping and (if required) is in allowed groups + return external_user_mapping + + def _check_group_membership(self, groups_token, user_id): + """ + Check if user is a member of any allowed Globus groups. + + Args: + groups_token: Access token for Globus Groups API + user_id: Globus user ID (sub) + + Returns: + True if user is in at least one allowed group or if no groups configured, False otherwise + """ + if not self.allowed_globus_groups: + # No groups configured - skip check + return True + + try: + # Create GroupsClient with access token + authorizer = globus_sdk.AccessTokenAuthorizer(groups_token) + groups_client = globus_sdk.GroupsClient(authorizer=authorizer) + + # Get user's group memberships + user_groups = groups_client.get_my_groups() + + # Extract group IDs from response + user_group_ids = {group["id"] for group in user_groups} + + # Check if user is in any allowed group (OR logic) + allowed_groups_set = set(self.allowed_globus_groups) + intersection = user_group_ids.intersection(allowed_groups_set) + + if intersection: + logger.info(f"User {user_id} is member of allowed groups: {intersection}") + return True + else: + logger.warning( + f"User {user_id} is not a member of any allowed groups. " + f"User groups: {user_group_ids}, Allowed: {allowed_groups_set}" + ) + return False + + except globus_sdk.GlobusAPIError as e: + logger.error(f"Globus API error checking groups for {user_id}: {e.code} - {e.message}") + # Fail closed - deny access on API errors + return False + except Exception as e: + logger.error(f"Unexpected error checking groups for {user_id}: {repr(e)}") + # Fail closed - deny access on unexpected errors + return False def api_authenticate(self, request): try: @@ -55,6 +150,19 @@ def api_authenticate(self, request): } ) + # Check whether the user is part of predefined set of Globus groups every time we log in + if self.allowed_globus_groups: + # For new users, check group membership before creating account + groups_token = globus_user_info.get("globus_groups_token") + if not groups_token: + raise PermissionDenied("Groups token not available for authentication.") + + if not self._check_group_membership(groups_token, globus_user_info["sub"]): + raise PermissionDenied( + "User is not a member of any allowed Globus groups. " + "Please contact your administrator for access." + ) + if not external_user_mapping: # Since its Globus, lets get the username from the email username = globus_user_info["email"].split("@")[0] @@ -73,13 +181,13 @@ def api_authenticate(self, request): } ) - globus_user_info["internal_user_id"] = internal_user self.create_external_user_mapping( { - "user_id": globus_user_info["internal_user_id"], + "user_id": internal_user, "user_authentication_name": type(self).__name__, "external_user_id": globus_user_info["sub"], "external_username": globus_user_info["username"], + "external_user_details": globus_user_info, } ) return internal_user @@ -125,13 +233,23 @@ def globus_oauth_get_user_info(self, body): code = body["code"] tokens = self.oauth.oauth2_exchange_code_for_tokens(code) - # Need to add call here to grab user profile info - return self.introspect_globus_user( - tokens.by_resource_server["groups.api.globus.org"]["access_token"] - ) + # Get user profile info using groups token + groups_token = tokens.by_resource_server["groups.api.globus.org"]["access_token"] + user_info = self.introspect_globus_user(groups_token) + + # Store the groups token for later group membership checking + user_info["globus_groups_token"] = groups_token + + return user_info def globus_token_get_user_info(self, body): if "auth_token" not in body: raise ParseError("Missing auth_token.") - return self.introspect_globus_user(body.get("auth_token")) + auth_token = body.get("auth_token") + user_info = self.introspect_globus_user(auth_token) + + # Store the auth token as groups token for group membership checking + user_info["globus_groups_token"] = auth_token + + return user_info diff --git a/src/user_workspaces_server/sphinx_ext/__init__.py b/src/user_workspaces_server/sphinx_ext/__init__.py new file mode 100644 index 0000000..15092ba --- /dev/null +++ b/src/user_workspaces_server/sphinx_ext/__init__.py @@ -0,0 +1 @@ +"""Sphinx extensions for User Workspaces Server documentation.""" diff --git a/src/user_workspaces_server/sphinx_ext/json_schema_autodoc.py b/src/user_workspaces_server/sphinx_ext/json_schema_autodoc.py new file mode 100644 index 0000000..fee9619 --- /dev/null +++ b/src/user_workspaces_server/sphinx_ext/json_schema_autodoc.py @@ -0,0 +1,296 @@ +""" +Sphinx extension for automatic configuration documentation from JSON schemas. + +This extension generates RST documentation from JSON Schema definitions, +keeping documentation in sync with validation rules automatically. +""" + +import json +import os +from typing import Any, Dict + +from sphinx.application import Sphinx +from sphinx.util import logging + +from user_workspaces_server.config_schemas.json_schema_loader import ( + JSONSchemaWrapper, + get_schema_loader, +) + +logger = logging.getLogger(__name__) + + +def generate_field_doc( + field_name: str, field_def: Dict[str, Any], is_required: bool, indent: int = 0 +) -> str: + """ + Generate RST documentation for a single configuration field. + + Args: + field_name: Name of the field + field_def: JSON Schema field definition + is_required: Whether the field is required + indent: Indentation level + + Returns: + RST formatted string documenting the field + """ + indent_str = " " * indent + lines = [] + + # Field name and type + field_type = field_def.get("type", "any") + required_badge = "**[Required]**" if is_required else "*[Optional]*" + lines.append(f"{indent_str}**{field_name}** ({field_type}) {required_badge}") + lines.append("") + + # Description + if "description" in field_def: + lines.append(f"{indent_str} {field_def['description']}") + lines.append("") + + # Default value + if not is_required and "default" in field_def: + default_val = field_def["default"] + if isinstance(default_val, str): + lines.append(f'{indent_str} *Default:* ``"{default_val}"``') + else: + lines.append(f"{indent_str} *Default:* ``{default_val}``") + lines.append("") + + # Enum choices + if "enum" in field_def: + choices_str = ", ".join(f'``"{c}"``' for c in field_def["enum"]) + lines.append(f"{indent_str} *Allowed values:* {choices_str}") + lines.append("") + + # Numeric constraints + constraints = [] + if "minimum" in field_def: + constraints.append(f"minimum: {field_def['minimum']}") + if "maximum" in field_def: + constraints.append(f"maximum: {field_def['maximum']}") + if "pattern" in field_def: + constraints.append(f"pattern: ``{field_def['pattern']}``") + + if constraints: + lines.append(f"{indent_str} *Constraints:* {', '.join(constraints)}") + lines.append("") + + # Nested object properties + if field_type == "object" and "properties" in field_def: + lines.append(f"{indent_str} *Nested fields:*") + lines.append("") + nested_required = field_def.get("required", []) + for nested_name, nested_def in field_def["properties"].items(): + nested_is_required = nested_name in nested_required + lines.append( + generate_field_doc(nested_name, nested_def, nested_is_required, indent + 2) + ) + + return "\n".join(lines) + + +def generate_controller_doc(schema: JSONSchemaWrapper) -> str: + """ + Generate RST documentation for a controller schema. + + Args: + schema: The JSON schema wrapper + + Returns: + RST formatted string documenting the controller + """ + lines = [] + + # Section header + class_name = schema.controller_name + lines.append(class_name) + lines.append("^" * len(class_name)) + lines.append("") + + # Description + if schema.description: + lines.append(schema.description) + lines.append("") + + # Configuration type field + type_field_name = { + "resource": "resource_type", + "storage": "storage_type", + "authentication": "user_authentication_type", + "job_type": "job_type", + }.get(schema.category) + + if type_field_name: + lines.append(f'**Configuration value:** ``"{class_name}"``') + lines.append("") + lines.append(f'Set ``{type_field_name}`` to ``"{class_name}"`` to use this controller.') + lines.append("") + + # Required fields + required_fields = schema.get_required_fields() + if required_fields: + lines.append("Required Configuration Fields") + lines.append("~" * 30) + lines.append("") + for field_name in required_fields: + field_def = schema.get_field(field_name) + if field_def: + lines.append(generate_field_doc(field_name, field_def, True)) + lines.append("") + + # Optional fields + optional_fields = schema.get_optional_fields() + if optional_fields: + lines.append("Optional Configuration Fields") + lines.append("~" * 30) + lines.append("") + for field_name in optional_fields: + field_def = schema.get_field(field_name) + if field_def: + lines.append(generate_field_doc(field_name, field_def, False)) + lines.append("") + + # Example configuration + if schema.example: + lines.append("Example Configuration") + lines.append("~" * 30) + lines.append("") + lines.append(".. code-block:: json") + lines.append("") + + example_json = json.dumps(schema.example, indent=2) + for line in example_json.split("\n"): + if line: + lines.append(f" {line}") + lines.append("") + + return "\n".join(lines) + + +def generate_category_doc(category: str, schemas: Dict[str, JSONSchemaWrapper]) -> str: + """ + Generate RST documentation for a category of controllers. + + Args: + category: Category name (resources, storage, authentication, job_types) + schemas: Dictionary of JSON schemas in this category + + Returns: + RST formatted string documenting all controllers in the category + """ + lines = [] + + # Category header + category_titles = { + "resources": "Resource Controllers", + "storage": "Storage Controllers", + "authentication": "Authentication Controllers", + "job_types": "Job Type Controllers", + } + title = category_titles.get(category, category.title()) + lines.append(title) + lines.append("=" * len(title)) + lines.append("") + + # Category description + category_descriptions = { + "resources": "Resource controllers manage job execution on different compute platforms.", + "storage": "Storage controllers manage workspace file storage backends.", + "authentication": "Authentication controllers handle user authentication and authorization.", + "job_types": "Job type controllers define different types of computational jobs.", + } + if category in category_descriptions: + lines.append(category_descriptions[category]) + lines.append("") + + # Generate docs for each controller + for controller_name in sorted(schemas.keys()): + schema = schemas[controller_name] + lines.append(generate_controller_doc(schema)) + lines.append("") + + return "\n".join(lines) + + +def generate_config_reference(output_dir: str): + """ + Generate complete configuration reference documentation. + + Args: + output_dir: Directory to write documentation files + """ + logger.info("Generating configuration reference documentation from JSON schemas...") + + # Get schema loader + schema_loader = get_schema_loader() + all_schemas = schema_loader.get_all_schemas() + + # Generate overview/index + index_lines = [] + index_lines.append("Configuration Reference") + index_lines.append("=" * 25) + index_lines.append("") + index_lines.append("This documentation is automatically generated from JSON Schema files.") + index_lines.append("All validation rules described here are enforced at startup time.") + index_lines.append("") + index_lines.append( + "Schema files are located in ``src/user_workspaces_server/config_schemas/schemas/``" + ) + index_lines.append("") + index_lines.append(".. toctree::") + index_lines.append(" :maxdepth: 2") + index_lines.append(" :caption: Configuration:") + index_lines.append("") + index_lines.append(" config_resources") + index_lines.append(" config_storage") + index_lines.append(" config_authentication") + index_lines.append(" config_job_types") + index_lines.append("") + + # Write index + with open(os.path.join(output_dir, "config_reference.rst"), "w") as f: + f.write("\n".join(index_lines)) + + # Generate category docs + for category, schemas in all_schemas.items(): + if schemas: # Only generate if there are schemas + filename = f"config_{category}.rst" + with open(os.path.join(output_dir, filename), "w") as f: + f.write(generate_category_doc(category, schemas)) + + logger.info(f"Configuration reference documentation written to {output_dir}") + + +def builder_inited(app: Sphinx): + """ + Sphinx event handler called when builder is initialized. + + Generates configuration documentation before Sphinx processes it. + """ + output_dir = app.srcdir + try: + generate_config_reference(output_dir) + except Exception as e: + logger.error(f"Failed to generate configuration documentation: {e}") + raise + + +def setup(app: Sphinx) -> Dict[str, Any]: + """ + Sphinx extension setup function. + + Args: + app: Sphinx application instance + + Returns: + Extension metadata + """ + app.connect("builder-inited", builder_inited) + + return { + "version": "1.0", + "parallel_read_safe": True, + "parallel_write_safe": True, + }