fixing circular import

HDCharles · HDCharles · commit 20371993f065 · 2024-02-09T15:08:39.000-08:00
Summary: was generate.py -> tp.py -> quantize.py -> eval.py -> generate.py I remove the link between generate.py and tp.py until runtime when quantize.py will be fully initialized. note the try/except is still needed for lm_eval stuff in case it is not installed. Also removed issues with initializing tasks multiple times when the new line in generate.py is hit. Test Plan: (with lm_eval 0.3/0.4/not installed) python quantize.py --mode int8 Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: b3efa96 Pull Request resolved: #97
diff --git a/eval.py b/eval.py
@@ -35,7 +35,6 @@
         from lm_eval.models.huggingface import HFLM as eval_wrapper
         from lm_eval.tasks import get_task_dict
         from lm_eval.evaluator import evaluate
-        lm_eval.tasks.initialize_tasks()
     except: #lm_eval version 0.3
         from lm_eval import base
         from lm_eval import tasks
@@ -179,6 +178,11 @@ def eval(
         max_seq_length,
     )
 
+    try:
+        lm_eval.tasks.initialize_tasks()
+    except:
+        pass
+
     if 'hendrycks_test' in tasks:
         tasks.remove('hendrycks_test')
         tasks += [x for x in lm_eval.tasks.hendrycks_test.create_all_tasks().keys()]
diff --git a/generate.py b/generate.py
@@ -34,7 +34,6 @@ def device_sync(device):
 from sentencepiece import SentencePieceProcessor
 
 from model import Transformer
-from tp import maybe_init_dist
 
 
 def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization
@@ -268,6 +267,7 @@ def main(
     assert tokenizer_path.is_file(), tokenizer_path
 
     global print
+    from tp import maybe_init_dist
     rank = maybe_init_dist()
     use_tp = rank is not None
     if use_tp:
diff --git a/quantize.py b/quantize.py
@@ -13,7 +13,7 @@
 
 try:
     from GPTQ import GenericGPTQRunner, InputRecorder
-    from eval import get_task_dict, evaluate
+    from eval import get_task_dict, evaluate, lm_eval
 except:
     pass
 
@@ -249,8 +249,14 @@ def get_inputs(model, tokenizer, calibration_tasks, calibration_limit, calibrati
             calibration_seq_length,
             pad_calibration_inputs,
         )
+
+        try:
+            lm_eval.tasks.initialize_tasks()
+        except:
+            pass
         task_dict = get_task_dict(calibration_tasks)
         print("Obtaining GPTQ calibration inputs on: ", calibration_tasks)
+
         evaluate(
             input_recorder,
             task_dict,