diff --git a/torchao/float8/float8_linear_utils.py b/torchao/float8/float8_linear_utils.py
index 0d9674e6c3..e0def790b8 100644
--- a/torchao/float8/float8_linear_utils.py
+++ b/torchao/float8/float8_linear_utils.py
@@ -7,6 +7,7 @@
 from functools import partial
 from typing import Callable, List, Optional, Union
 
+import torch
 import torch.nn as nn
 
 from torchao.float8.config import Float8LinearConfig, Float8LinearRecipeName
@@ -101,6 +102,7 @@ def convert_to_float8_training(
     Returns:
      nn.Module: The modified module with swapped linear layers.
     """
+    torch._C._log_api_usage_once("torchao.float8.convert_to_float8_training")
     if config is None:
         config = Float8LinearConfig()
 
diff --git a/torchao/float8/fsdp_utils.py b/torchao/float8/fsdp_utils.py
index 7fdf8de262..79e62c7e10 100644
--- a/torchao/float8/fsdp_utils.py
+++ b/torchao/float8/fsdp_utils.py
@@ -39,6 +39,10 @@ def precompute_float8_dynamic_scale_for_fsdp(module: nn.Module) -> None:
 
     from torchao.float8.float8_linear import Float8Linear
 
+    torch._C._log_api_usage_once(
+        "torchao.float8.precompute_float8_dynamic_scale_for_fsdp"
+    )
+
     float8_linears: List[Float8Linear] = [
         m
         for m in module.modules()
diff --git a/torchao/optim/adam.py b/torchao/optim/adam.py
index 05e97ed23a..8beaffb627 100644
--- a/torchao/optim/adam.py
+++ b/torchao/optim/adam.py
@@ -233,6 +233,7 @@ def __init__(
             bf16_stochastic_round=bf16_stochastic_round,
             is_adamw=False,
         )
+        torch._C._log_api_usage_once("torchao.optim.Adam8bit")
 
     @staticmethod
     def _subclass_zeros(p: Tensor, signed: bool, block_size: int):
@@ -263,6 +264,7 @@ def __init__(
             bf16_stochastic_round=bf16_stochastic_round,
             is_adamw=False,
         )
+        torch._C._log_api_usage_once("torchao.optim.Adam4bit")
 
     @staticmethod
     def _subclass_zeros(p: Tensor, signed: bool, block_size: int):
@@ -293,6 +295,7 @@ def __init__(
             bf16_stochastic_round=bf16_stochastic_round,
             is_adamw=False,
         )
+        torch._C._log_api_usage_once("torchao.optim.AdamFp8")
 
     @staticmethod
     def _subclass_zeros(p: Tensor, signed: bool, block_size: int):
@@ -323,6 +326,7 @@ def __init__(
             bf16_stochastic_round=bf16_stochastic_round,
             is_adamw=True,
         )
+        torch._C._log_api_usage_once("torchao.optim.AdamW8bit")
 
     @staticmethod
     def _subclass_zeros(p: Tensor, signed: bool, block_size: int):
@@ -353,6 +357,7 @@ def __init__(
             bf16_stochastic_round=bf16_stochastic_round,
             is_adamw=True,
         )
+        torch._C._log_api_usage_once("torchao.optim.AdamW4bit")
 
     @staticmethod
     def _subclass_zeros(p: Tensor, signed: bool, block_size: int):
@@ -383,6 +388,7 @@ def __init__(
             bf16_stochastic_round=bf16_stochastic_round,
             is_adamw=True,
         )
+        torch._C._log_api_usage_once("torchao.optim.AdamWFp8")
 
     @staticmethod
     def _subclass_zeros(p: Tensor, signed: bool, block_size: int):
diff --git a/torchao/quantization/pt2e/convert.py b/torchao/quantization/pt2e/convert.py
index 3728d7c252..f205e55a6d 100644
--- a/torchao/quantization/pt2e/convert.py
+++ b/torchao/quantization/pt2e/convert.py
@@ -1266,9 +1266,6 @@ def _convert_to_reference_decomposed_fx(
         reference_quantized_model = _convert_to_reference_decomposed_fx(prepared_model)
 
     """
-    torch._C._log_api_usage_once(
-        "quantization_api.quantize_fx._convert_to_reference_decomposed_fx"
-    )
     return _convert_fx(
         graph_module,
         is_reference=True,
diff --git a/torchao/quantization/pt2e/quantize_pt2e.py b/torchao/quantization/pt2e/quantize_pt2e.py
index 1975642dfd..88f0eb490c 100644
--- a/torchao/quantization/pt2e/quantize_pt2e.py
+++ b/torchao/quantization/pt2e/quantize_pt2e.py
@@ -106,7 +106,7 @@ def calibrate(model, data_loader):
 
         return torch_prepare_pt2e(model, quantizer)
 
-    torch._C._log_api_usage_once("quantization_api.quantize_pt2e.prepare_pt2e")
+    torch._C._log_api_usage_once("torchao.quantization.pt2e.prepare_pt2e")
     original_graph_meta = model.meta
     node_name_to_scope = _get_node_name_to_scope(model)
     # TODO: check qconfig_mapping to make sure conv and bn are both configured
@@ -192,7 +192,7 @@ def train_loop(model, train_data):
 
         return torch_prepare_qat_pt2e(model, quantizer)
 
-    torch._C._log_api_usage_once("quantization_api.quantize_pt2e.prepare_qat_pt2e")
+    torch._C._log_api_usage_once("torchao.quantization.pt2e.prepare_qat_pt2e")
     original_graph_meta = model.meta
     node_name_to_scope = _get_node_name_to_scope(model)
     model = quantizer.transform_for_annotation(model)
@@ -304,7 +304,7 @@ def convert_pt2e(
 
         return torch_convert_pt2e(model, use_reference_representation, fold_quantize)
 
-    torch._C._log_api_usage_once("quantization_api.quantize_pt2e.convert_pt2e")
+    torch._C._log_api_usage_once("torchao.quantization.pt2e.convert_pt2e")
     if not isinstance(use_reference_representation, bool):
         raise ValueError(
             "Unexpected argument type for `use_reference_representation`, "
diff --git a/torchao/quantization/qat/api.py b/torchao/quantization/qat/api.py
index 5aa46548a2..e7bbba466a 100644
--- a/torchao/quantization/qat/api.py
+++ b/torchao/quantization/qat/api.py
@@ -146,6 +146,7 @@ def __init__(
         self.__post_init__()
 
     def __post_init__(self):
+        torch._C._log_api_usage_once("torchao.quantization.qat.QATConfig")
         self.step = self.step.lower()
         all_step_values = [s.value for s in QATStep]
         if self.step not in all_step_values:
@@ -377,6 +378,7 @@ class ComposableQATQuantizer(TwoStepQuantizer):
     """
 
     def __init__(self, quantizers: List[TwoStepQuantizer]):
+        torch._C._log_api_usage_once("torchao.quantization.qat.ComposableQATQuantizer")
         self.quantizers = quantizers
 
     def prepare(
@@ -403,6 +405,8 @@ def initialize_fake_quantizers(
     :class:`~torchao.quantization.qat.fake_quantizer.IntxFakeQuantizerBase`
     in the model based on the provided example inputs.
     """
+    torch._C._log_api_usage_once("torchao.quantization.qat.initialize_fake_quantizers")
+
     # avoid circular dependencies
     from torchao.quantization.qat.fake_quantizer import IntxFakeQuantizer
 
diff --git a/torchao/quantization/qat/embedding.py b/torchao/quantization/qat/embedding.py
index 28a3f2cee0..a1a6484772 100644
--- a/torchao/quantization/qat/embedding.py
+++ b/torchao/quantization/qat/embedding.py
@@ -65,6 +65,7 @@ def __init__(
             *args,
             **kwargs,
         )
+        torch._C._log_api_usage_once("torchao.quantization.qat.FakeQuantizedEmbedding")
         if weight_config is not None:
             self.weight_fake_quantizer = FakeQuantizerBase.from_config(weight_config)
         else:
@@ -148,6 +149,9 @@ def __init__(
         zero_point_precision: torch.dtype = torch.int32,
     ) -> None:
         super().__init__()
+        torch._C._log_api_usage_once(
+            "torchao.quantization.qat.Int4WeightOnlyEmbeddingQATQuantizer"
+        )
         self.bit_width = 4
         self.group_size: int = group_size
         self.scale_precision: torch.dtype = scale_precision
diff --git a/torchao/quantization/qat/fake_quantizer.py b/torchao/quantization/qat/fake_quantizer.py
index b63dbdb309..5af6cc4f51 100644
--- a/torchao/quantization/qat/fake_quantizer.py
+++ b/torchao/quantization/qat/fake_quantizer.py
@@ -66,6 +66,7 @@ class IntxFakeQuantizer(FakeQuantizerBase):
 
     def __init__(self, config: IntxFakeQuantizeConfig):
         super().__init__()
+        torch._C._log_api_usage_once("torchao.quantization.qat.FakeQuantizer")
         self.config = config
         self.enabled = True
         self.scale: Optional[torch.Tensor] = None
diff --git a/torchao/quantization/qat/linear.py b/torchao/quantization/qat/linear.py
index f94ec6f272..35fa980e1b 100644
--- a/torchao/quantization/qat/linear.py
+++ b/torchao/quantization/qat/linear.py
@@ -81,6 +81,7 @@ def __init__(
             *args,
             **kwargs,
         )
+        torch._C._log_api_usage_once("torchao.quantization.qat.FakeQuantizedLinear")
         # initialize activation fake quantizer
         if activation_config is not None:
             self.activation_fake_quantizer = FakeQuantizerBase.from_config(
@@ -210,6 +211,9 @@ def __init__(
         scales_precision: torch.dtype = torch.float32,
     ) -> None:
         super().__init__()
+        torch._C._log_api_usage_once(
+            "torchao.quantization.qat.Int8DynActInt4WeightQATQuantizer"
+        )
         self.groupsize: int = groupsize
         self.padding_allowed: bool = padding_allowed
         self.precision: torch.dtype = precision
@@ -413,6 +417,9 @@ def __init__(
         scales_precision: torch.dtype = torch.bfloat16,
     ) -> None:
         super().__init__()
+        torch._C._log_api_usage_once(
+            "torchao.quantization.qat.Int4WeightOnlyQATQuantizer"
+        )
         assert inner_k_tiles in [2, 4, 8]
         assert groupsize in [32, 64, 128, 256]
         self.inner_k_tiles = inner_k_tiles
@@ -594,6 +601,9 @@ def __init__(
         group_size: Optional[int] = 64,
         scale_precision: torch.dtype = torch.bfloat16,
     ):
+        torch._C._log_api_usage_once(
+            "torchao.quantization.qat.Float8ActInt4WeightQATQuantizer"
+        )
         if group_size is not None:
             weight_granularity = "per_group"
         else:
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
index fea37376a5..8d84182bdc 100644
--- a/torchao/quantization/quant_api.py
+++ b/torchao/quantization/quant_api.py
@@ -127,6 +127,7 @@
 
 logger = logging.getLogger(__name__)
 
+# TODO: revisit this list?
 __all__ = [
     "swap_conv2d_1x1_to_linear",
     "Quantizer",
@@ -510,6 +511,8 @@ def quantize_(
         quantize_(m, int4_weight_only(group_size=32))
 
     """
+    torch._C._log_api_usage_once("torchao.quantization.quantize_")
+
     filter_fn = _is_linear if filter_fn is None else filter_fn
 
     if isinstance(config, ModuleFqnToConfig):
@@ -619,6 +622,11 @@ class Int8DynamicActivationInt4WeightConfig(AOBaseConfig):
     act_mapping_type: MappingType = MappingType.ASYMMETRIC
     set_inductor_config: bool = True
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once(
+            "torchao.quantization.Int8DynamicActivationInt4WeightConfig"
+        )
+
 
 # for BC
 int8_dynamic_activation_int4_weight = Int8DynamicActivationInt4WeightConfig
@@ -729,6 +737,9 @@ class Int8DynamicActivationIntxWeightConfig(AOBaseConfig):
     layout: Layout = QDQLayout()
 
     def __post_init__(self):
+        torch._C._log_api_usage_once(
+            "torchao.quantization.Int8DynamicActivationIntxWeightConfig"
+        )
         assert self.weight_dtype in [getattr(torch, f"int{b}") for b in range(1, 9)], (
             f"weight_dtype must be torch.intx, where 1 <= x <= 8, but got {self.weight_dtype}"
         )
@@ -876,6 +887,11 @@ class Int4DynamicActivationInt4WeightConfig(AOBaseConfig):
     act_mapping_type: MappingType = MappingType.SYMMETRIC
     set_inductor_config: bool = True
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once(
+            "torchao.quantization.Int4DynamicActivationInt4WeightConfig"
+        )
+
 
 # for bc
 int4_dynamic_activation_int4_weight = Int4DynamicActivationInt4WeightConfig
@@ -932,6 +948,11 @@ class GemliteUIntXWeightOnlyConfig(AOBaseConfig):
     mode: Optional[str] = "weight_only"
     set_inductor_config: bool = True
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once(
+            "torchao.quantization.GemliteUIntXWeightOnlyConfig"
+        )
+
 
 # for BC
 gemlite_uintx_weight_only = GemliteUIntXWeightOnlyConfig
@@ -1005,6 +1026,9 @@ class Int4WeightOnlyConfig(AOBaseConfig):
     packing_format: PackingFormat = PackingFormat.PLAIN
     VERSION: int = 1
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once("torchao.quantization.Int4WeightOnlyConfig")
+
 
 # for BC
 # TODO maybe change other callsites
@@ -1178,6 +1202,9 @@ class Int8WeightOnlyConfig(AOBaseConfig):
     group_size: Optional[int] = None
     set_inductor_config: bool = True
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once("torchao.quantization.Int8WeightOnlyConfig")
+
 
 # for BC
 int8_weight_only = Int8WeightOnlyConfig
@@ -1334,6 +1361,11 @@ class Int8DynamicActivationInt8WeightConfig(AOBaseConfig):
     weight_only_decode: bool = False
     set_inductor_config: bool = True
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once(
+            "torchao.quantization.Int8DynamicActivationInt8WeightConfig"
+        )
+
 
 # for BC
 int8_dynamic_activation_int8_weight = Int8DynamicActivationInt8WeightConfig
@@ -1438,6 +1470,9 @@ class Float8WeightOnlyConfig(AOBaseConfig):
     set_inductor_config: bool = True
     version: int = 2
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once("torchao.quantization.Float8WeightOnlyConfig")
+
 
 # for BC
 float8_weight_only = Float8WeightOnlyConfig
@@ -1586,9 +1621,11 @@ class Float8DynamicActivationFloat8WeightConfig(AOBaseConfig):
     version: int = 2
 
     def __post_init__(self):
+        torch._C._log_api_usage_once(
+            "torchao.quantization.Float8DynamicActivationFloat8WeightConfig"
+        )
         if self.mm_config is None:
             self.mm_config = Float8MMConfig(use_fast_accum=True)
-
         activation_granularity, weight_granularity = _normalize_granularity(
             self.granularity
         )
@@ -1705,6 +1742,11 @@ class Float8DynamicActivationFloat8SemiSparseWeightConfig(AOBaseConfig):
     activation_dtype: torch.dtype = e5m2_dtype
     weight_dtype: torch.dtype = e4m3_dtype
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once(
+            "torchao.quantization.Float8DynamicActivationFloat8SemiSparseWeightConfig"
+        )
+
 
 @register_quantize_module_handler(Float8DynamicActivationFloat8SemiSparseWeightConfig)
 def _float8_dynamic_activation_float8_semi_sparse_weight_transform(
@@ -1756,6 +1798,11 @@ class Float8StaticActivationFloat8WeightConfig(AOBaseConfig):
     mm_config: Optional[Float8MMConfig] = Float8MMConfig(use_fast_accum=True)
     set_inductor_config: bool = True
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once(
+            "torchao.quantization.Float8StaticActivationFloat8WeightConfig"
+        )
+
 
 # for bc
 float8_static_activation_float8_weight = Float8StaticActivationFloat8WeightConfig
@@ -1836,6 +1883,9 @@ class UIntXWeightOnlyConfig(AOBaseConfig):
     use_hqq: bool = False
     set_inductor_config: bool = True
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once("torchao.quantization.UIntXWeightOnlyConfig")
+
 
 # for BC
 uintx_weight_only = UIntXWeightOnlyConfig
@@ -1934,6 +1984,7 @@ class IntxWeightOnlyConfig(AOBaseConfig):
     layout: Layout = QDQLayout()
 
     def __post_init__(self):
+        torch._C._log_api_usage_once("torchao.quantization.IntxWeightOnlyConfig")
         assert self.weight_dtype in [getattr(torch, f"int{b}") for b in range(1, 9)], (
             f"weight_dtype must be torch.intx, where 1 <= x <= 8, but got {self.weight_dtype}"
         )
@@ -2007,6 +2058,9 @@ class FPXWeightOnlyConfig(AOBaseConfig):
     mbits: int
     set_inductor_config: bool = True
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once("torchao.quantization.FPXWeightOnlyConfig")
+
 
 # for BC
 fpx_weight_only = FPXWeightOnlyConfig
@@ -2138,6 +2192,9 @@ class ModuleFqnToConfig(AOBaseConfig):
         default_factory=dict
     )
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once("torchao.quantization.ModuleFqnToConfig")
+
 
 def _module_fqn_to_config_handler(
     module: torch.nn.Module, module_fqn: str, config: ModuleFqnToConfig
diff --git a/torchao/sparsity/sparse_api.py b/torchao/sparsity/sparse_api.py
index b263b5e098..f0d3183e35 100644
--- a/torchao/sparsity/sparse_api.py
+++ b/torchao/sparsity/sparse_api.py
@@ -50,6 +50,9 @@ def apply_fake_sparsity(model, **kwargs):
 class BlockSparseWeightConfig(AOBaseConfig):
     blocksize: int = 64
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once("torchao.sparsity.BlockSparseWeightConfig")
+
 
 # for bc
 block_sparse_weight = BlockSparseWeightConfig
@@ -72,7 +75,8 @@ class SemiSparseWeightConfig(AOBaseConfig):
     Configuration for converting the weight of linear modules to semi-structured (2:4) sparsity
     """
 
-    pass
+    def __post_init__(self):
+        torch._C._log_api_usage_once("torchao.sparsity.SemiSparseWeightConfig")
 
 
 # for bc
@@ -127,6 +131,7 @@ def filter_fn(module: nn.Module, fqn: str) -> bool:
             from torchao.dtypes import SemiSparseLayout
             m = quantize_(m, int8_dynamic_activation_int8_weight(layout=SemiSparseLayout), filter_fn)
     """
+    torch._C._log_api_usage_once("torchao.sparsity.sparsify_")
     handler = _QUANTIZE_CONFIG_HANDLER[type(config)]
     _replace_with_custom_fn_if_matches_filter(
         model,