diff --git a/src/forge/cli/config.py b/src/forge/cli/config.py
index a5e35cefd..2dd171ae3 100644
--- a/src/forge/cli/config.py
+++ b/src/forge/cli/config.py
@@ -56,22 +56,22 @@ def _merge_yaml_and_cli_args(yaml_args: Namespace, cli_args: list[str]) -> DictC
     cli args, respectively) and merges them into a single OmegaConf DictConfig.
 
     If a cli arg overrides a yaml arg with a _component_ field, the cli arg can
-    be specified with the parent field directly, e.g., model=torchtune.models.lora_llama2_7b
-    instead of model._component_=torchtune.models.lora_llama2_7b. Nested fields within the
+    be specified with the parent field directly, e.g., model=my_module.models.my_model
+    instead of model._component_=my_module.models.my_model. Nested fields within the
     component should be specified with dot notation, e.g., model.lora_rank=16.
 
     Example:
         >>> config.yaml:
         >>>     a: 1
         >>>     b:
-        >>>       _component_: torchtune.models.my_model
+        >>>       _component_: my_module.models.my_model
         >>>       c: 3
 
-        >>> tune full_finetune --config config.yaml b=torchtune.models.other_model b.c=4
+        >>> python main.py --config config.yaml b=my_module.models.other_model b.c=4
         >>> yaml_args, cli_args = parser.parse_known_args()
         >>> conf = _merge_yaml_and_cli_args(yaml_args, cli_args)
         >>> print(conf)
-        >>> {"a": 1, "b": {"_component_": "torchtune.models.other_model", "c": 4}}
+        >>> {"a": 1, "b": {"_component_": "my_module.models.other_model", "c": 4}}
 
     Args:
         yaml_args (Namespace): Namespace containing args from yaml file, components
diff --git a/src/forge/data/datasets/dataset.py b/src/forge/data/datasets/dataset.py
index 57a624c67..f18d9e07e 100644
--- a/src/forge/data/datasets/dataset.py
+++ b/src/forge/data/datasets/dataset.py
@@ -61,7 +61,7 @@ class DatasetInfo:
 
 
 class TuneIterableDataset(IterableDataset, ABC):
-    """Base class for all torchtune iterable datasets.
+    """Base class for all forge iterable datasets.
 
     Datasets are composable, enabling complex structures such as:
     ``PackedDataset(InterleavedDataset([InterleavedDataset([ds1, ds2]), ds3]))``
diff --git a/src/forge/data/datasets/sft_dataset.py b/src/forge/data/datasets/sft_dataset.py
index fca97f912..b31d16fad 100644
--- a/src/forge/data/datasets/sft_dataset.py
+++ b/src/forge/data/datasets/sft_dataset.py
@@ -22,8 +22,7 @@ class AlpacaToMessages(Transform):
     (or equivalent fields specified in column_map) columns. User messages are formed from the
     instruction + input columns and assistant messages are formed from the output column. Prompt
     templating is conditional on the presence of the "input" column, and thus is handled directly
-    in this transform class instead of a dedicated :class:`~torchtune.data.PromptTemplate` class
-    due to this custom logic.
+    in this transform class.
 
     Args:
         column_map (dict[str, str] | None): a mapping to change the expected "instruction", "input",
diff --git a/src/forge/data/tokenizer.py b/src/forge/data/tokenizer.py
index d93c5d4a4..65407e131 100644
--- a/src/forge/data/tokenizer.py
+++ b/src/forge/data/tokenizer.py
@@ -20,7 +20,7 @@
 class HuggingFaceBaseTokenizer(BaseTokenizer):
     """
     A wrapper around Hugging Face tokenizers. See https://github.com/huggingface/tokenizers
-    This can be used to load from a Hugging Face tokenizer.json file into a torchtune BaseTokenizer.
+    This can be used to load from a Hugging Face tokenizer.json file into a forge BaseTokenizer.
 
     This class will load the tokenizer.json file from tokenizer_json_path. It will
     attempt to infer BOS and EOS token IDs from config.json if possible, and if not
@@ -210,7 +210,7 @@ class HuggingFaceModelTokenizer(ModelTokenizer):
     Then, it will load all special tokens and chat template from tokenizer config file.
 
     It can be used to tokenize messages with correct chat template, and it eliminates the requirement of
-    the specific ModelTokenizer and custom PromptTemplate.
+    the specific ModelTokenizer.
 
     Args:
         tokenizer_json_path (str): Path to tokenizer.json file
diff --git a/src/forge/data/utils.py b/src/forge/data/utils.py
index b2fdaec0c..be8c13857 100644
--- a/src/forge/data/utils.py
+++ b/src/forge/data/utils.py
@@ -32,7 +32,7 @@ class TuneMessage:
     """
     This class represents individual messages in a fine-tuning dataset. It supports
     text-only content, text with interleaved images, and tool calls. The
-    :class:`~torchtune.modules.transforms.tokenizers.ModelTokenizer` will tokenize
+    :class:`~forge.interfaces.ModelTokenizer` will tokenize
     the content of the message using ``tokenize_messages`` and attach the appropriate
     special tokens based on the flags set in this class.
 
@@ -61,8 +61,7 @@ class TuneMessage:
             - All ipython messages (tool call returns) should set ``eot=False``.
 
     Note:
-        TuneMessage class expects any image content to be a ``torch.Tensor``, as output
-        by e.g. :func:`~torchtune.data.load_image`
+        TuneMessage class expects any image content to be a ``torch.Tensor``.
     """
 
     def __init__(
diff --git a/src/forge/interfaces.py b/src/forge/interfaces.py
index df79c302e..a07bb592a 100644
--- a/src/forge/interfaces.py
+++ b/src/forge/interfaces.py
@@ -97,8 +97,7 @@ async def update_weights(self, policy_version: int):
 class BaseTokenizer(ABC):
     """
     Abstract token encoding model that implements ``encode`` and ``decode`` methods.
-    See :class:`~torchtune.modules.transforms.tokenizers.SentencePieceBaseTokenizer` and
-    :class:`~torchtune.modules.transforms.tokenizers.TikTokenBaseTokenizer` for example implementations of this protocol.
+    See :class:`forge.data.HuggingFaceModelTokenizer for an example implementation of this protocol.
     """
 
     @abstractmethod
@@ -133,7 +132,7 @@ def decode(self, token_ids: list[int], **kwargs: dict[str, Any]) -> str:
 class ModelTokenizer(ABC):
     """
     Abstract tokenizer that implements model-specific special token logic in
-    the ``tokenize_messages`` method. See :class:`~torchtune.models.llama3.Llama3Tokenizer`
+    the ``tokenize_messages`` method. See :class:`forge.data.HuggingFaceModelTokenizer`
     for an example implementation of this protocol.
     """
 
diff --git a/src/forge/util/logging.py b/src/forge/util/logging.py
index e47f5dfa3..9eacf893d 100644
--- a/src/forge/util/logging.py
+++ b/src/forge/util/logging.py
@@ -20,14 +20,17 @@ def get_logger(level: str | None = None) -> logging.Logger:
     Example:
         >>> logger = get_logger("INFO")
         >>> logger.info("Hello world!")
-        INFO:torchtune.utils._logging:Hello world!
+        INFO:forge.util.logging: Hello world!
 
     Returns:
         logging.Logger: The logger.
     """
     logger = logging.getLogger(__name__)
     if not logger.hasHandlers():
-        logger.addHandler(logging.StreamHandler())
+        handler = logging.StreamHandler()
+        formatter = logging.Formatter("%(levelname)s:%(name)s: %(message)s")
+        handler.setFormatter(formatter)
+        logger.addHandler(handler)
     if level is not None:
         level = getattr(logging, level.upper())
         logger.setLevel(level)
diff --git a/src/forge/util/metric_logging.py b/src/forge/util/metric_logging.py
index ba8992d20..1fe180a35 100644
--- a/src/forge/util/metric_logging.py
+++ b/src/forge/util/metric_logging.py
@@ -178,7 +178,7 @@ class WandBLogger(MetricLogger):
             If int, all metrics will be logged at this frequency.
             If Mapping, calls to `log` and `log_dict` will be ignored if `step % freq[metric_name] != 0`
         log_dir (str | None): WandB log directory.
-        project (str): WandB project name. Default is `torchtune`.
+        project (str): WandB project name. Default is `torchforge`.
         entity (str | None): WandB entity name. If you don't specify an entity,
             the run will be sent to your default entity, which is usually your username.
         group (str | None): WandB group name for grouping runs together. If you don't
@@ -205,7 +205,7 @@ class WandBLogger(MetricLogger):
     def __init__(
         self,
         freq: Union[int, Mapping[str, int]],
-        project: str,
+        project: str = "torchforge",
         log_dir: str = "metrics_log",
         entity: str | None = None,
         group: str | None = None,