Unit test of counter table's checkpoint.

jiashuy · jiashuy · commit c23f388c85ef · 2025-12-01T06:38:10.000Z
diff --git a/corelib/dynamicemb/dynamicemb/batched_dynamicemb_tables.py b/corelib/dynamicemb/dynamicemb/batched_dynamicemb_tables.py
@@ -141,6 +141,10 @@ def find_files(root_path: str, table_name: str, suffix: str) -> Tuple[List[str],
         "emb_values": partial(encode_checkpoint_file_path, item="values"),
         "emb_scores": partial(encode_checkpoint_file_path, item="scores"),
         "opt_values": partial(encode_checkpoint_file_path, item="opt_values"),
+        "counter_keys": partial(encode_counter_checkpoint_file_path, item="keys"),
+        "counter_frequencies": partial(
+            encode_counter_checkpoint_file_path, item="frequencies"
+        ),
     }
     if suffix not in suffix_to_encode_file_path_func:
         raise RuntimeError(f"Invalid suffix: {suffix}")
@@ -1232,6 +1236,7 @@ def dump(
         self,
         save_dir: str,
         optim: bool = False,
+        counter: bool = False,
         table_names: Optional[List[str]] = None,
         pg: Optional[dist.ProcessGroup] = None,
     ) -> None:
@@ -1245,7 +1250,7 @@ def dump(
         world_size = dist.get_world_size(group=pg)
 
         self.flush()
-        for table_name, storage, counter in zip(
+        for table_name, storage, counter_table in zip(
             self._table_names, self._storages, self._admission_counter
         ):
             if table_name not in set(table_names):
@@ -1278,20 +1283,28 @@ def dump(
                 include_meta=(rank == 0),
             )
 
+            if not counter:
+                continue
+
             counter_key_path = encode_counter_checkpoint_file_path(
                 save_dir, table_name, rank, world_size, "keys"
             )
             counter_frequency_path = encode_counter_checkpoint_file_path(
                 save_dir, table_name, rank, world_size, "frequencies"
             )
 
-            if counter is not None:
-                counter.dump(counter_key_path, counter_frequency_path)
+            if counter_table is not None:
+                counter_table.dump(counter_key_path, counter_frequency_path)
+            else:
+                warnings.warn(
+                    f"Counter table is none and will not dump it for table: {table_name}"
+                )
 
     def load(
         self,
         save_dir: str,
         optim: bool = False,
+        counter: bool = False,
         table_names: Optional[List[str]] = None,
         pg: Optional[dist.ProcessGroup] = None,
     ):
@@ -1305,7 +1318,7 @@ def load(
             rank = dist.get_rank(group=pg)
             world_size = dist.get_world_size(group=pg)
 
-        for table_name, storage, counter in zip(
+        for table_name, storage, counter_table in zip(
             self._table_names, self._storages, self._admission_counter
         ):
             if table_name not in set(table_names):
@@ -1338,11 +1351,16 @@ def load(
                     include_optim=optim,
                 )
 
-            if counter is None:
+            if not counter:
+                continue
+            if counter_table is None:
+                warnings.warn(
+                    f"Counter table is none and will not load for table: {table_name}"
+                )
                 continue
             num_counter_key_files = len(counter_key_files)
             for i in range(num_counter_key_files):
-                counter.load(counter_key_files[i], counter_frequency_files[i])
+                counter_table.load(counter_key_files[i], counter_frequency_files[i])
 
     def export_keys_values(
         self, table_name: str, device: torch.device, batch_size: int = 65536
diff --git a/corelib/dynamicemb/dynamicemb/dump_load.py b/corelib/dynamicemb/dynamicemb/dump_load.py
@@ -93,6 +93,7 @@ def DynamicEmbDump(
     model: nn.Module,
     table_names: Optional[Dict[str, List[str]]] = None,
     optim: Optional[bool] = False,
+    counter: Optional[bool] = False,
     pg: dist.ProcessGroup = dist.group.WORLD,
     allow_overwrite: bool = False,
 ) -> None:
@@ -115,6 +116,8 @@ def DynamicEmbDump(
         and the value is a list of dynamic embedding table names within that collection. Defaults to None.
     optim : Optional[bool], optional
         Whether to dump the optimizer states. Defaults to False.
+    counter : Optional[bool], optional
+        Whether to dump the embedding admission counter table. Defaults to False.
     pg : Optional[dist.ProcessGroup], optional
         The process group used to control the communication scope in the dump. Defaults to None.
 
@@ -175,6 +178,7 @@ def DynamicEmbDump(
             dynamic_emb_module.dump(
                 full_collection_path,
                 optim=optim,
+                counter=counter,
                 table_names=table_names_to_dump,
                 pg=pg,
             )
@@ -197,6 +201,7 @@ def DynamicEmbLoad(
     model: nn.Module,
     table_names: Optional[List[str]] = None,
     optim: bool = False,
+    counter: bool = False,
     pg: dist.ProcessGroup = dist.group.WORLD,
 ):
     """
@@ -216,6 +221,8 @@ def DynamicEmbLoad(
         and the value is a list of dynamic embedding table names within that collection. Defaults to None.
     optim : bool, optional
         Whether to load the optimizer states. Defaults to False.
+    counter : bool, optional
+        Whether to load the embedding admission counter table. Defaults to False.
     pg : Optional[dist.ProcessGroup], optional
         The process group used to control the communication scope in the load. Defaults to None.
 
@@ -257,6 +264,7 @@ def DynamicEmbLoad(
             dynamic_emb_module.load(
                 full_collection_path,
                 optim=optim,
+                counter=counter,
                 table_names=table_names_to_load,
                 pg=pg,
             )
diff --git a/corelib/dynamicemb/dynamicemb/embedding_admission.py b/corelib/dynamicemb/dynamicemb/embedding_admission.py
@@ -106,6 +106,7 @@ def dump(self, key_file, counter_file) -> None:
             key_file (str): the file path of keys.
             counter_file (str): the file path of frequencies.
         """
+        print(f"Counter size: {self.table_.size()}")
         self.table_.dump(key_file, {self.score_name_: counter_file})
 
 
diff --git a/corelib/dynamicemb/test/unit_tests/test_embedding_dump_load.py b/corelib/dynamicemb/test/unit_tests/test_embedding_dump_load.py
@@ -24,7 +24,11 @@
 import torch
 import torch.distributed as dist
 import torch.nn as nn
-from dynamicemb import DynamicEmbScoreStrategy, DynamicEmbTableOptions
+from dynamicemb import (
+    DynamicEmbScoreStrategy,
+    DynamicEmbTableOptions,
+    FrequencyAdmissionStrategy,
+)
 from dynamicemb.dump_load import (
     DynamicEmbDump,
     DynamicEmbLoad,
@@ -38,6 +42,7 @@
 from dynamicemb.embedding_admission import KVCounter
 from dynamicemb.get_planner import get_planner
 from dynamicemb.key_value_table import batched_export_keys_values
+from dynamicemb.scored_hashtable import ScoreArg, ScorePolicy
 from dynamicemb.shard import DynamicEmbeddingCollectionSharder
 from dynamicemb.types import AdmissionStrategy
 from dynamicemb.utils import TORCHREC_TYPES
@@ -318,6 +323,40 @@ def create_model(
     return model
 
 
+def check_counter_table_checkpoint(x, y):
+    device = torch.cuda.current_device()
+    tables_x = get_dynamic_emb_module(x)
+    tables_y = get_dynamic_emb_module(y)
+
+    for table_x, table_y in zip(tables_x, tables_y):
+        for cnt_tx, cnt_ty in zip(table_x, table_y):
+            assert cnt_tx.table_.size() == cnt_ty.table_.size()
+
+            for keys, named_scores in cnt_tx._batched_export_keys_scores(
+                cnt_tx.table_.score_names_, torch.device(f"cuda:{device}")
+            ):
+                if keys.numel() == 0:
+                    continue
+                freq_name = cnt_tx.table_.score_names_[0]
+                frequencies = named_scores[freq_name]
+
+                score_args_lookup = [
+                    ScoreArg(
+                        name=freq_name,
+                        value=torch.zeros_like(frequencies),
+                        policy=ScorePolicy.CONST,
+                        is_return=True,
+                    )
+                ]
+                founds = torch.empty(
+                    keys.numel(), dtype=torch.bool, device=device
+                ).fill_(False)
+
+                cnt_ty.lookup(keys, score_args_lookup, founds)
+
+                assert torch.equal(frequencies, score_args_lookup)
+
+
 @click.command()
 @click.option("--num-embedding-collections", type=int, required=True)
 @click.option("--num-embeddings", type=str, required=True)
@@ -336,6 +375,7 @@ def create_model(
     required=True,
 )
 @click.option("--optim", type=bool, required=True)
+@click.option("--counter", type=bool, required=True)
 def test_model_load_dump(
     num_embedding_collections: int,
     num_embeddings: str,
@@ -346,6 +386,7 @@ def test_model_load_dump(
     mode: str,
     save_path: str,
     optim: bool,
+    counter: bool,
     batch_size: int = 128,
     num_iterations: int = 10,
 ):
@@ -367,6 +408,9 @@ def test_model_load_dump(
         embedding_dim=embedding_dim,
         optimizer_kwargs=optimizer_kwargs,
         score_strategy=score_strategy_,
+        admit_strategy=FrequencyAdmissionStrategy(
+            threshold=2 if counter else 1,
+        ),
     )
 
     kjts, feature_names, all_kjts = generate_sparse_feature(
@@ -388,7 +432,7 @@ def test_model_load_dump(
 
     if mode == "dump":
         shutil.rmtree(save_path, ignore_errors=True)
-        DynamicEmbDump(save_path, ref_model, optim=optim)
+        DynamicEmbDump(save_path, ref_model, optim=optim, counter=counter)
 
     if mode == "load":
         model = create_model(
@@ -397,16 +441,24 @@ def test_model_load_dump(
             embedding_dim=embedding_dim,
             optimizer_kwargs=optimizer_kwargs,
             score_strategy=score_strategy_,
+            admit_strategy=FrequencyAdmissionStrategy(
+                threshold=2 if counter else 1,
+            ),
         )
 
-        DynamicEmbLoad(save_path, model, optim=optim)
+        DynamicEmbLoad(save_path, model, optim=optim, counter=counter)
+
+        if counter:
+            check_counter_table_checkpoint(model, ref_model)
 
         table_name_to_key_score_dict = {}
         for _, _, sharded_module in find_sharded_modules(model):
             dynamic_emb_modules = get_dynamic_emb_module(sharded_module)
             for dynamic_emb_module in dynamic_emb_modules:
-                for table_name, table in zip(
-                    dynamic_emb_module.table_names, dynamic_emb_module.tables
+                for table_name, table, counter_table in zip(
+                    dynamic_emb_module.table_names,
+                    dynamic_emb_module.tables,
+                    dynamic_emb_module._admission_counter,
                 ):
                     key_to_score = {}
                     for batched_key, _, _, batched_score in batched_export_keys_values(
@@ -416,6 +468,21 @@ def test_model_load_dump(
                             batched_key.tolist(), batched_score.tolist()
                         ):
                             key_to_score[key] = score
+
+                    for (
+                        keys,
+                        named_scores,
+                    ) in counter_table.table_._batched_export_keys_scores(
+                        counter_table.table_.score_names_, torch.device(f"cpu")
+                    ):
+                        if keys.numel() == 0:
+                            continue
+                        freq_name = counter_table.table_.score_names_[0]
+                        frequencies = named_scores[freq_name]
+
+                        for key, score in zip(keys.tolist(), frequencies.tolist()):
+                            key_to_score[key] = score
+
                     table_name_to_key_score_dict[table_name] = key_to_score
 
         for embedding_collection_idx, embedding_idx in product(
diff --git a/corelib/dynamicemb/test/unit_tests/test_embedding_dump_load.sh b/corelib/dynamicemb/test/unit_tests/test_embedding_dump_load.sh
@@ -8,45 +8,24 @@ NUM_GPUS=(1 4)
 OPTIMIZER_TYPE=("adam" "sgd" "adagrad" "rowwise_adagrad")
 INCLUDE_OPTIM=("True" "False")
 SCORE_STRATEGY=("timestamp" "lfu" "step")
+INCLUDE_COUNTER=("True" "False")
 
 for num_gpus in ${NUM_GPUS[@]}; do
   for optimizer_type in ${OPTIMIZER_TYPE[@]}; do  
     for include_optim in ${INCLUDE_OPTIM[@]}; do
-      for score_strategy in ${SCORE_STRATEGY[@]}; do
-        echo "num_gpus: $num_gpus, optimizer_type: $optimizer_type, include_optim: $include_optim, score_strategy: $score_strategy"
-        torchrun \
-          --nnodes 1 \
-          --nproc_per_node $num_gpus \
-          ./test/unit_tests/test_embedding_dump_load.py \
-          --optimizer-type ${optimizer_type} \
-          --score-strategy ${score_strategy} \
-          --mode "dump" \
-          --optim ${include_optim} \
-          --save-path "debug_weight_${optimizer_type}_${num_gpus}_${include_optim}_${score_strategy}" \
-          --num-embedding-collections $NUM_EMBEDDING_COLLECTIONS \
-          --num-embeddings $NUM_EMBEDDINGS \
-          --multi-hot-sizes $MULTI_HOT_SIZES \
-          --embedding-dim 16 || exit 1
-      done
-    done
-  done
-done
-
-for num_load_gpus in ${NUM_GPUS[@]}; do
-  for num_dump_gpus in ${NUM_GPUS[@]}; do
-    for optimizer_type in ${OPTIMIZER_TYPE[@]}; do  
-      for include_optim in ${INCLUDE_OPTIM[@]}; do
+      for include_counter in ${INCLUDE_COUNTER[@]}; do
         for score_strategy in ${SCORE_STRATEGY[@]}; do
-          echo "num_load_gpus: $num_load_gpus, num_dump_gpus: $num_dump_gpus, optimizer_type: $optimizer_type, include_optim: $include_optim, score_strategy: $score_strategy"
+          echo "num_gpus: $num_gpus, optimizer_type: $optimizer_type, include_optim: $include_optim, include_counter: $include_counter, score_strategy: $score_strategy"
           torchrun \
             --nnodes 1 \
-            --nproc_per_node $num_load_gpus \
+            --nproc_per_node $num_gpus \
             ./test/unit_tests/test_embedding_dump_load.py \
             --optimizer-type ${optimizer_type} \
             --score-strategy ${score_strategy} \
-            --mode "load" \
+            --mode "dump" \
             --optim ${include_optim} \
-            --save-path "debug_weight_${optimizer_type}_${num_dump_gpus}_${include_optim}_${score_strategy}" \
+            --counter ${include_counter} \
+            --save-path "debug_weight_${optimizer_type}_${num_gpus}_${include_optim}_${include_counter}_${score_strategy}" \
             --num-embedding-collections $NUM_EMBEDDING_COLLECTIONS \
             --num-embeddings $NUM_EMBEDDINGS \
             --multi-hot-sizes $MULTI_HOT_SIZES \
@@ -55,4 +34,32 @@ for num_load_gpus in ${NUM_GPUS[@]}; do
       done
     done
   done
+done
+
+for num_load_gpus in ${NUM_GPUS[@]}; do
+  for num_dump_gpus in ${NUM_GPUS[@]}; do
+    for optimizer_type in ${OPTIMIZER_TYPE[@]}; do  
+      for include_optim in ${INCLUDE_OPTIM[@]}; do
+        for include_counter in ${INCLUDE_COUNTER[@]}; do
+          for score_strategy in ${SCORE_STRATEGY[@]}; do
+            echo "num_load_gpus: $num_load_gpus, num_dump_gpus: $num_dump_gpus, optimizer_type: $optimizer_type, include_optim: $include_optim, include_counter: $include_counter, score_strategy: $score_strategy"
+            torchrun \
+              --nnodes 1 \
+              --nproc_per_node $num_load_gpus \
+              ./test/unit_tests/test_embedding_dump_load.py \
+              --optimizer-type ${optimizer_type} \
+              --score-strategy ${score_strategy} \
+              --mode "load" \
+              --optim ${include_optim} \
+              --counter ${include_counter} \
+              --save-path "debug_weight_${optimizer_type}_${num_dump_gpus}_${include_optim}_${include_counter}_${score_strategy}" \
+              --num-embedding-collections $NUM_EMBEDDING_COLLECTIONS \
+              --num-embeddings $NUM_EMBEDDINGS \
+              --multi-hot-sizes $MULTI_HOT_SIZES \
+              --embedding-dim 16 || exit 1
+          done
+        done
+      done
+    done
+  done
 done