diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 78fd09634..9b16e58b1 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -39,7 +39,7 @@ jobs: run: python -m pip install torch==2.9.0 --index-url https://download.pytorch.org/whl/test/cu130 - name: Install monarch shell: bash -l {0} - run: python -m pip install monarch-no-torch==0.1.0.dev20250826 --find-links assets/ci + run: pip install torchmonarch - name: Install torchforge shell: bash -l {0} env: @@ -52,9 +52,35 @@ jobs: shell: bash -l {0} working-directory: docs run: | - set +e # Don't exit on error - make html SPHINXOPTS="-WT --keep-going" || echo "Build completed with warnings/errors" - set -e # Re-enable exit on error for subsequent commands + # Set up library paths to ensure all dependencies are available + # This is critical for monarch and other native dependencies that need libpython3.10.so.1.0 + export LD_LIBRARY_PATH="${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH:-}" + + # Also set CUDA paths if needed + if [ -d "/usr/local/cuda-12.9" ]; then + export LD_LIBRARY_PATH="/usr/local/cuda-12.9/compat:${LD_LIBRARY_PATH}" + export CUDA_HOME=/usr/local/cuda-12.9 + fi + + # Verify dependencies can be imported before building docs + echo "Verifying dependencies..." + python -c "import forge; print('✓ forge imported successfully')" + python -c "import monarch; print('✓ monarch imported successfully')" + + # Build docs with -WT (warnings as errors) and --keep-going to see all issues + # Capture exit code but continue to see all errors + set +e + make html SPHINXOPTS="--keep-going" + BUILD_EXIT_CODE=$? + set -e + + # Report results + if [ $BUILD_EXIT_CODE -ne 0 ]; then + echo "❌ Documentation build failed with warnings or errors (exit code: $BUILD_EXIT_CODE)" + exit $BUILD_EXIT_CODE + else + echo "✅ Documentation build completed successfully with no warnings or errors" + fi - name: Upload docs artifact uses: actions/upload-artifact@v4 with: diff --git a/.gitignore b/.gitignore index 413066489..c952405d6 100644 --- a/.gitignore +++ b/.gitignore @@ -153,7 +153,7 @@ docs/source/generated_examples/ docs/source/gen_modules/ docs/source/generated/ docs/source/sg_execution_times.rst -docs/source/tutorials +docs/source/tutorials/* # pytorch-sphinx-theme gets installed here docs/src diff --git a/docs/requirements.txt b/docs/requirements.txt index 8846bc62e..525ca1e86 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -6,3 +6,4 @@ sphinxcontrib-mermaid==1.0.0 sphinx-gallery==0.19.0 myst-parser #==0.18.1 # if want to contribute in markdown sphinx-sitemap==2.7.1 +sphinx-autodoc-typehints==1.25.3 diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css new file mode 100644 index 000000000..89854cc8b --- /dev/null +++ b/docs/source/_static/custom.css @@ -0,0 +1,98 @@ +/* Custom CSS for collapsible parameter lists */ + +/* Hide parameters in signatures */ +.sig-param-hidden { + display: none !important; +} + +/* Inline toggle button for signatures */ +.params-toggle-btn-inline { + display: inline; + padding: 0.2rem 0.5rem; + margin: 0 0.25rem; + background-color: var(--pst-color-background); + border: 1px solid var(--pst-color-border); + border-radius: 3px; + cursor: pointer; + font-size: 0.85em; + font-family: var(--pst-font-family-base); + color: var(--pst-color-primary); + transition: all 0.2s ease; + vertical-align: middle; +} + +.params-toggle-btn-inline:hover { + background-color: var(--pst-color-background); + border-color: var(--pst-color-border); +} + +.params-toggle-btn-inline:focus { + outline: none; +} + +.toggle-icon { + display: inline-block; + font-size: 0.8em; + transition: transform 0.2s ease; +} + +/* Wrapper for the button */ +.sig-params-wrapper { + display: inline; +} + +/* Old styles for field-list collapsing (kept for backward compatibility) */ +.collapsible-params { + margin: 1rem 0; +} + +.params-toggle-btn { + display: inline-block; + padding: 0.5rem 1rem; + margin-bottom: 0.5rem; + background-color: var(--pst-color-background); + border: 1px solid var(--pst-color-border); + border-radius: 4px; + cursor: pointer; + font-size: 0.9rem; + color: var(--pst-color-primary); + transition: all 0.3s ease; +} + +.params-toggle-btn:hover { + background-color: var(--pst-color-background); + border-color: var(--pst-color-border); +} + +.params-content { + max-height: 10000px; + overflow: hidden; + transition: max-height 0.5s ease, opacity 0.3s ease; + opacity: 1; +} + +.params-content.collapsed { + max-height: 0; + opacity: 0; +} + +/* Ensure the collapsed parameters look good */ +.params-content dl.field-list { + margin-top: 0; +} + +.params-content > dt { + margin-top: 0.5rem; +} + +.params-content > dt:first-child { + margin-top: 0; +} + +/* Responsive adjustments */ +@media (max-width: 768px) { + .params-toggle-btn { + width: 100%; + text-align: left; + } +} diff --git a/docs/source/_static/custom.js b/docs/source/_static/custom.js new file mode 100644 index 000000000..415592d30 --- /dev/null +++ b/docs/source/_static/custom.js @@ -0,0 +1,93 @@ +// Custom JavaScript to make long parameter lists in class signatures collapsible +document.addEventListener('DOMContentLoaded', function() { + console.log('Collapsible parameters script loaded'); + + // Find all class/function signatures + const signatures = document.querySelectorAll('dl.py.class > dt, dl.py.function > dt, dl.py.method > dt'); + + signatures.forEach(function(signature) { + // Find all parameter elements in the signature + const params = signature.querySelectorAll('em.sig-param, .sig-param'); + + console.log(`Found signature with ${params.length} parameters`); + + // Only make it collapsible if there are more than 10 parameters + if (params.length > 10) { + console.log('Creating collapsible structure for signature with', params.length, 'parameters'); + + const visibleCount = 5; + const hiddenCount = params.length - visibleCount; + + // Create a wrapper div for the toggle button + const wrapper = document.createElement('span'); + wrapper.className = 'sig-params-wrapper'; + wrapper.style.display = 'inline'; + + // Create toggle button + const toggleBtn = document.createElement('button'); + toggleBtn.className = 'params-toggle-btn-inline'; + toggleBtn.innerHTML = ` Show More`; + toggleBtn.setAttribute('aria-expanded', 'false'); + toggleBtn.title = `Show ${hiddenCount} more parameters`; + + // Collect all nodes to hide (params and text nodes between them) + const nodesToHide = []; + + // Hide parameters after the first 3 + let insertedButton = false; + params.forEach(function(param, index) { + if (index >= visibleCount) { + // Add 'hidden' class to hide the parameter + param.classList.add('sig-param-hidden'); + nodesToHide.push(param); + + // Also hide the text node (comma/space) that follows this parameter + let nextNode = param.nextSibling; + while (nextNode && nextNode.nodeType === Node.TEXT_NODE) { + const textSpan = document.createElement('span'); + textSpan.className = 'sig-param-hidden'; + textSpan.textContent = nextNode.textContent; + nextNode.parentNode.replaceChild(textSpan, nextNode); + nodesToHide.push(textSpan); + break; + } + + // Insert the toggle button before the first hidden parameter + if (!insertedButton) { + param.parentNode.insertBefore(wrapper, param); + wrapper.appendChild(toggleBtn); + insertedButton = true; + } + } + }); + + // Add click handler to toggle + toggleBtn.addEventListener('click', function(e) { + e.preventDefault(); + e.stopPropagation(); + + const isExpanded = toggleBtn.getAttribute('aria-expanded') === 'true'; + + if (isExpanded) { + // Collapse: hide parameters again + nodesToHide.forEach(function(node) { + node.classList.add('sig-param-hidden'); + }); + toggleBtn.setAttribute('aria-expanded', 'false'); + toggleBtn.innerHTML = ` Show More`; + toggleBtn.title = `Show ${hiddenCount} more parameters`; + } else { + // Expand: show all parameters + nodesToHide.forEach(function(node) { + node.classList.remove('sig-param-hidden'); + }); + toggleBtn.setAttribute('aria-expanded', 'true'); + toggleBtn.innerHTML = ` Hide`; + toggleBtn.title = `Hide ${hiddenCount} parameters`; + } + }); + + console.log('Collapsible structure created successfully'); + } + }); +}); diff --git a/docs/source/api.md b/docs/source/api.md index 5ed009c4c..1235f9d4e 100644 --- a/docs/source/api.md +++ b/docs/source/api.md @@ -1,35 +1,35 @@ # API Reference -This section provides comprehensive API documentation for TorchForge modules and classes. +This section provides comprehensive API documentation for TorchForge. -TorchForge is organized into several key modules, each providing specialized functionality for post-training generative AI models: +## Overview -## Module Overview +TorchForge is a PyTorch native platform for post-training generative AI models, +designed to streamline reinforcement learning workflows for large language +models. The platform leverages PyTorch's distributed computing capabilities +and is built on top of [Monarch](https://meta-pytorch.org/monarch/), +making extensive use of actors for distributed computation and fault tolerance. -**Core Components** -- [Interfaces & Types](api_core.md) - Core interfaces and type definitions -- [Actors](api_actors.md) - Model training and inference components -- [Controller](api_controller.md) - Distributed training orchestration and resource management +Key Features of TorchForge include: -**Data Management** -- [Data](api_data.md) - Data handling utilities, datasets, and data models +- **Actor-Based Architecture**: TorchForge uses an actor-based system for distributed training, providing excellent scalability and fault tolerance. +- **PyTorch Native**: Built natively on PyTorch, ensuring seamless integration with existing PyTorch workflows. +- **Post-Training Focus**: Specifically designed for post-training techniques like RLHF, SFT, and other alignment methods. +- **Distributed by Design**: Supports multi-GPU and multi-node training out of the box. -**Training Components** -- [Losses](api_losses.md) - Loss functions for reinforcement learning and supervised fine-tuning -- [Environments](api_envs.md) - Training and inference environments -**Tools & Utilities** -- [Utilities](api_util.md) - General utility functions and helpers +For most use cases, you'll interact with the high-level service +interfaces, which handle the complexity of actor coordination and +distributed training automatically. -```{toctree} -:maxdepth: 2 -:hidden: +For advanced users who need fine-grained control, the individual actor +APIs provide direct access to the underlying distributed components. -api_core +```{toctree} +:maxdepth: 1 api_actors -api_data -api_losses -api_envs -api_controller -api_util +api_service +api_generator +api_model +api_trainer ``` diff --git a/docs/source/api_actors.md b/docs/source/api_actors.md index 6ef5f1ff8..73eae1220 100644 --- a/docs/source/api_actors.md +++ b/docs/source/api_actors.md @@ -1,19 +1,20 @@ -# Actors - -The actors module contains the core components for model training and inference in TorchForge. This includes policy actors, reference models, replay buffers, and trainers. - -## Policy Actor - -The policy actor is responsible for model inference and policy interactions during training. - -## Reference Model - -The reference model provides baseline comparisons for reinforcement learning algorithms. - -## Replay Buffer - -The replay buffer manages storage and sampling of training experiences. - -## Trainer - -The trainer orchestrates the training process and implements training algorithms. +# ForgeActor + +```{eval-rst} +.. currentmodule:: forge.actors +``` + +The actors module contains the core components for model training +and inference in TorchForge. These pre-built actors provide essential +functionality for reinforcement learning workflows and can be used +as building blocks for complex distributed training systems. + +```{eval-rst} +.. currentmodule:: forge.controller.actor + +.. autoclass:: ForgeActor + :members: + :undoc-members: + :show-inheritance: + :exclude-members: logger, setup, set_env, __init__ +``` diff --git a/docs/source/api_controller.md b/docs/source/api_controller.md deleted file mode 100644 index e9bedda74..000000000 --- a/docs/source/api_controller.md +++ /dev/null @@ -1,3 +0,0 @@ -# Controller - -Distributed training orchestration and resource management components for TorchForge. diff --git a/docs/source/api_core.md b/docs/source/api_core.md deleted file mode 100644 index 75b3e9ae5..000000000 --- a/docs/source/api_core.md +++ /dev/null @@ -1,3 +0,0 @@ -# Core Interfaces - -This section covers the fundamental interfaces and type definitions that form the foundation of TorchForge. diff --git a/docs/source/api_data.md b/docs/source/api_data.md deleted file mode 100644 index cbc1cfc53..000000000 --- a/docs/source/api_data.md +++ /dev/null @@ -1,16 +0,0 @@ -# Data Management - -Comprehensive data handling utilities for training and -inference, including datasets, data models, and various -data processing utilities. - -## Prompt - -Data model for input prompts and contexts. - -```{eval-rst} -.. automodule:: forge.data_models.prompt - :members: - :undoc-members: - :show-inheritance: -``` diff --git a/docs/source/api_envs.md b/docs/source/api_envs.md deleted file mode 100644 index 88e9d1cea..000000000 --- a/docs/source/api_envs.md +++ /dev/null @@ -1,8 +0,0 @@ -# Environments - -Training and inference environments for TorchForge models. - - -## Chat Environment - -Chat-based environment for conversational AI training and inference. diff --git a/docs/source/api_generator.md b/docs/source/api_generator.md new file mode 100644 index 000000000..a0bb67f3d --- /dev/null +++ b/docs/source/api_generator.md @@ -0,0 +1,46 @@ +# Generator + +```{eval-rst} +.. currentmodule:: forge.actors.policy +``` + +The Generator (Policy) is the core inference engine in TorchForge, +built on top of [vLLM](https://docs.vllm.ai/en/latest/). +It manages model serving, text generation, and weight updates for reinforcement learning workflows. + +## Policy + +```{eval-rst} +.. autoclass:: Policy + :members: launch, generate, update_weights, get_version, stop + :exclude-members: __init__ + :no-inherited-members: +``` + +## Configuration + +### EngineConfig + +```{eval-rst} +.. autoclass:: EngineConfig + :members: + :undoc-members: + :no-inherited-members: +``` + +### SamplingConfig + +```{eval-rst} +.. autoclass:: SamplingConfig + :members: + :undoc-members: +``` + +## PolicyWorker + +```{eval-rst} +.. autoclass:: PolicyWorker + :members: execute_model, update, setup_kv_cache + :show-inheritance: + :exclude-members: __init__ +``` diff --git a/docs/source/api_losses.md b/docs/source/api_losses.md deleted file mode 100644 index 097b83394..000000000 --- a/docs/source/api_losses.md +++ /dev/null @@ -1,11 +0,0 @@ -# Losses - -Loss functions for reinforcement learning and supervised fine-tuning in TorchForge. - -## GRPO Loss - -Generalized Reward Policy Optimization (GRPO) loss implementation for reinforcement learning. - -## Reinforce Loss - -Reinforce algorithm loss implementation for policy gradient methods. diff --git a/docs/source/api_model.md b/docs/source/api_model.md new file mode 100644 index 000000000..94e51478e --- /dev/null +++ b/docs/source/api_model.md @@ -0,0 +1,29 @@ +# Model + +```{eval-rst} +.. currentmodule:: forge.actors.reference_model +``` + +The {class}`forge.actors.reference_model.ReferenceModel` provides a frozen +copy of the policy model used for computing advantages in reinforcement +learning. It performs inference on input sequences and returns logits or +log probabilities for computing KL divergence and other RL metrics. + +## ReferenceModel + +```{eval-rst} +.. autoclass:: forge.actors.reference_model.ReferenceModel + :members: + :undoc-members: + :show-inheritance: +``` + +The ReferenceModel uses a subset of TorchTitan's configuration system: + +- **model**: Model architecture settings (Model dataclass) +- **parallelism**: Parallelism configuration for distributed inference (Parallelism dataclass) +- **checkpoint**: Checkpoint loading settings (Checkpoint dataclass) +- **compile**: Model compilation settings (Compile dataclass) +- **training**: Training configuration for dtype and other settings (Training dataclass) + +For detailed configuration options, refer to the [TorchTitan documentation](https://github.com/pytorch/torchtitan). diff --git a/docs/source/api_service.md b/docs/source/api_service.md new file mode 100644 index 000000000..df2bf3dc8 --- /dev/null +++ b/docs/source/api_service.md @@ -0,0 +1,12 @@ +# Service + +```{eval-rst} +.. currentmodule:: forge.controller.service.service +``` + +```{eval-rst} +.. autoclass:: Service + + :members: call_all, start_session, get_metrics, get_metrics_summary, terminate_session, stop + :show-inheritance: +``` diff --git a/docs/source/api_trainer.md b/docs/source/api_trainer.md new file mode 100644 index 000000000..75aba94f0 --- /dev/null +++ b/docs/source/api_trainer.md @@ -0,0 +1,68 @@ +# Trainer + +```{eval-rst} +.. currentmodule:: forge.actors.trainer +``` + +The Trainer manages model training in TorchForge, built on top of TorchTitan. +It handles forward/backward passes, weight updates, and checkpoint management for reinforcement learning workflows. + +## RLTrainer + +```{eval-rst} +.. autoclass:: RLTrainer + :members: train_step, push_weights, cleanup + :exclude-members: __init__ +``` + +## Configuration + +The RLTrainer uses TorchTitan's configuration system with the following components: + +### Job Configuration + +```{eval-rst} +.. autoclass:: torchtitan.config.job_config.Job + :members: + :undoc-members: +``` + +### Model Configuration + +```{eval-rst} +.. autoclass:: torchtitan.config.job_config.Model + :members: + :undoc-members: +``` + +### Optimizer Configuration + +```{eval-rst} +.. autoclass:: torchtitan.config.job_config.Optimizer + :members: + :undoc-members: +``` + +### Training Configuration + +```{eval-rst} +.. autoclass:: torchtitan.config.job_config.Training + :members: + :undoc-members: +``` + +### Parallelism Configuration + +```{eval-rst} +.. autoclass:: torchtitan.config.job_config.Parallelism + :members: + :undoc-members: +``` + +### Checkpoint Configuration + +```{eval-rst} +.. autoclass:: torchtitan.config.job_config.Checkpoint + :members: + :undoc-members: +``` diff --git a/docs/source/api_util.md b/docs/source/api_util.md deleted file mode 100644 index f15e03b76..000000000 --- a/docs/source/api_util.md +++ /dev/null @@ -1,25 +0,0 @@ -# Utilities - -General utility functions and helpers used throughout TorchForge. - -## Distributed Computing - -Utilities for distributed training and communication. - -```{eval-rst} -.. automodule:: forge.util.distributed - :members: - :undoc-members: - :show-inheritance: -``` - -## Logging - -Logging configuration and utilities. - -```{eval-rst} -.. automodule:: forge.util.logging - :members: - :undoc-members: - :show-inheritance: -``` diff --git a/docs/source/conf.py b/docs/source/conf.py index 760a8d714..ee9d62148 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -58,6 +58,7 @@ def get_version_path(): "myst_parser", "sphinx.ext.autodoc", "sphinx.ext.autosummary", + "sphinx_autodoc_typehints", "sphinx.ext.napoleon", "sphinx.ext.intersphinx", "sphinx.ext.viewcode", @@ -74,11 +75,18 @@ def get_version_path(): ] sitemap_url_scheme = "{link}" +# Ensure static files use relative paths +html_static_path = ["_static"] + templates_path = [ "_templates", os.path.join(os.path.dirname(pytorch_sphinx_theme2.__file__), "templates"), ] -exclude_patterns = ["tutorials/index.rst"] +exclude_patterns = ["tutorials/index.rst", "tutorials/template_tutorial.rst"] + +html_static_path = ["_static"] +html_css_files = ["custom.css"] +html_js_files = ["custom.js"] sys.path.insert(0, os.path.abspath(".")) sys.path.insert(0, os.path.abspath("../../src")) @@ -124,6 +132,8 @@ def get_version_path(): "navbar_center": "navbar-nav", "canonical_url": "https://meta-pytorch.org/forge/", "header_links_before_dropdown": 7, + "show_nav_level": 2, + "show_toc_level": 2, } theme_variables = pytorch_sphinx_theme2.get_theme_variables() @@ -160,11 +170,42 @@ def get_version_path(): autodoc_default_options = { "members": True, "member-order": "bysource", - "special-members": "__init__", - "undoc-members": True, "exclude-members": "__weakref__", + "private-members": False, } +# Autodoc configuration for cleaner signatures +autodoc_preserve_defaults = True # Preserves default values without expansion +autodoc_typehints = "description" # Move type hints to description instead of signature +autodoc_typehints_description_target = ( + "documented" # Only add types to documented params +) + +# Suppress warnings from third-party library docstrings +suppress_warnings = [ + "docutils", # Suppress docstring formatting issues from third-party libraries + "app.add_node", # Suppress node warnings + "app.add_directive", # Suppress directive warnings + "ref.class", # Suppress missing reference warnings + "ref.func", # Suppress missing function reference warnings + "ref.meth", # Suppress missing method reference warnings +] + +# Treat warnings as non-fatal - continue build even if there are warnings +keep_warnings = True + +# Don't fail the build on warnings - important for handling third-party library docstrings +# This is especially important when dependencies (like torchtitan) have RST formatting +# that may not be perfect but works with Napoleon extension +nitpicky = False # Don't be overly strict about references + +# Napoleon settings for Google-style docstrings (from torchtitan and other dependencies) +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_use_param = True +napoleon_use_rtype = True +napoleon_use_ivar = True + # -- Sphinx Gallery configuration ------------------------------------------- sphinx_gallery_conf = { @@ -176,6 +217,6 @@ def get_version_path(): "plot_gallery": "True", "promote_jupyter_magic": True, "backreferences_dir": None, - "write_computation_times": True, "show_signature": False, + "write_computation_times": False, } diff --git a/src/forge/actors/reference_model.py b/src/forge/actors/reference_model.py index cc57e5246..bfe9f9494 100644 --- a/src/forge/actors/reference_model.py +++ b/src/forge/actors/reference_model.py @@ -37,6 +37,10 @@ @dataclass class ReferenceModel(ForgeActor): + """ + Reference model implementation for the TorchForge service. + """ + # Refer to titan JobConfig for enabling more ForgeEngine configuration model: Model = field(default_factory=Model) parallelism: Parallelism = field(default_factory=Parallelism) diff --git a/src/forge/actors/trainer.py b/src/forge/actors/trainer.py index 4ffc63001..486286680 100644 --- a/src/forge/actors/trainer.py +++ b/src/forge/actors/trainer.py @@ -95,6 +95,10 @@ def cleanup_old_weight_versions( @dataclass class RLTrainer(ForgeActor): + """ + RL Trainer implementation for the TorchForge service. + """ + job: Job = field(default_factory=Job) model: Model = field(default_factory=Model) optimizer: Optimizer = field(default_factory=Optimizer) diff --git a/src/forge/controller/actor.py b/src/forge/controller/actor.py index a899da6f0..4a5cbf173 100644 --- a/src/forge/controller/actor.py +++ b/src/forge/controller/actor.py @@ -22,11 +22,36 @@ class ForgeActor(Actor): + """ + Base class for Forge actors with configurable resource attributes. + + The initialization sets up logging configuration with rank/size information and + initializes the actor's process mesh reference. The rank and size are automatically + determined from the current execution context. + + Args: + *args: Variable length argument list passed to the parent Actor class. + **kwargs: Arbitrary keyword arguments passed to the parent Actor class. + """ + procs: int = 1 + """Number of processes to use for this actor. Defaults to 1.""" + hosts: int | None = None + """Number of hosts to distribute the actor across. If None, uses as many + hosts as needed to accommodate the requested processes. Defaults to None.""" + with_gpus: bool = False + """Whether to allocate GPU resources for this actor. Defaults to False.""" + num_replicas: int = 1 + """Number of replicas to create when spawning as a service. + Only applies when using as_service(). Defaults to 1.""" + mesh_name: str | None = None + """Optional name for the process mesh used by this actor. + If None, a default name will be generated. Defaults to None.""" + _extra_config: dict[str, Any] = {} def __init__(self, *args, **kwargs): @@ -69,23 +94,35 @@ def options( `.as_actor()` or `.as_service()`. Each call creates a separate subclass, so multiple different configurations can coexist without interfering with each other. - ---- Usage Examples ---- + Examples: + + * Pre-configure a service with multiple replicas: + + .. code-block:: python + + service = await MyForgeActor.options(num_replicas=2, procs=2).as_service(...) + await service.shutdown() + + * Default usage without calling options: + + .. code-block:: python + + service = await MyForgeActor.as_service(...) + await service.shutdown() + + * Pre-configure a single actor + + .. code-block:: python - # Pre-configure a service with multiple replicas - service = await MyForgeActor.options(num_replicas=2, procs=2).as_service(...) - await service.shutdown() + actor = await MyForgeActor.options(procs=1, hosts=1).as_actor(...) + await actor.shutdown() - # Default usage without calling options - service = await MyForgeActor.as_service(...) - await service.shutdown() + * Default usage without calling options - # Pre-configure a single actor - actor = await MyForgeActor.options(procs=1, hosts=1).as_actor(...) - await actor.shutdown() + .. code-block:: python - # Default usage without calling options - actor = await MyForgeActor.as_actor(...) - await actor.shutdown() + actor = await MyForgeActor.as_actor(...) + await actor.shutdown() """ attrs = { diff --git a/src/forge/controller/service/service.py b/src/forge/controller/service/service.py index 0b655fb6a..1413cbba1 100644 --- a/src/forge/controller/service/service.py +++ b/src/forge/controller/service/service.py @@ -68,13 +68,6 @@ class Service: actor_def: Actor class definition to instantiate on each replica *actor_args: Positional arguments passed to actor constructor **actor_kwargs: Keyword arguments passed to actor constructor - - - Attributes: - _cfg: Service configuration - _replicas: List of managed replica instances - _active_sessions: Currently active sessions - _metrics: Aggregated service and replica metrics """ def __init__( @@ -486,6 +479,10 @@ async def _get_replica(self, sess_id: str | None) -> "Replica": ) async def stop(self): + """ + Stops the service and all managed replicas. + This method should be called when the service is no longer needed. + """ logger.debug("Stopping service...") # Signal shutdown to health loop self._shutdown_requested = True @@ -605,12 +602,6 @@ class ServiceActor(Actor): actor_def: Actor class definition to instantiate on each replica *actor_args: Positional arguments passed to actor constructor **actor_kwargs: Keyword arguments passed to actor constructor - - Attributes: - _cfg: Service configuration - _replicas: List of managed replica instances - _active_sessions: Currently active sessions - _metrics: Aggregated service and replica metrics """ def __init__(self, cfg: ServiceConfig, actor_def, actor_kwargs: dict):