Fix cuda context bug caused by PreTrainedModel (#113)

VibhuJawa · web-flow · commit 770d26108f8d · 2025-03-14T10:22:34.000-07:00
* Fix cuda context bug caused by PreTrainedModel

Signed-off-by: Vibhu Jawa &lt;vjawa@nvidia.com&gt;

* Flake8 style fixes

Signed-off-by: Vibhu Jawa &lt;vjawa@nvidia.com&gt;

---------

Signed-off-by: Vibhu Jawa &lt;vjawa@nvidia.com&gt;
diff --git a/crossfit/backend/torch/hf/memory_curve_utils.py b/crossfit/backend/torch/hf/memory_curve_utils.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from __future__ import annotations
 
 import joblib
 import numpy as np
 import torch
+import transformers
 from sklearn.linear_model import LinearRegression
 from tqdm import tqdm
-from transformers import PreTrainedModel
 
 from crossfit.utils.model_adapter import adapt_model_input
 from crossfit.utils.torch_utils import (
@@ -29,7 +30,7 @@
 
 
 def fit_memory_estimate_curve(
-    model: PreTrainedModel,
+    model: "transformers.PreTrainedModel",
     path_or_name: str,
     start_batch_size: int = 1,
     end_batch_size: int = 2048,
diff --git a/crossfit/data/sparse/core.py b/crossfit/data/sparse/core.py
@@ -172,12 +172,12 @@ def to_pytrec(self, is_run=False):
 
         qrel = {}
         for i in range(self.indices.shape[0]):
-            query_id = f"q{i+1}"
+            query_id = f"q{i + 1}"
             qrel[query_id] = {}
 
             row = sparse_matrix[i]
             for j, score in zip(row.indices, row.data):
-                doc_id = f"d{j+1}"
+                doc_id = f"d{j + 1}"
                 qrel[query_id][doc_id] = int(score) if is_run else float(score)
 
         return qrel
diff --git a/examples/dask_aggregate_bench.py b/examples/dask_aggregate_bench.py
@@ -53,19 +53,15 @@
     columns = [f"I{i}" for i in range(1, ncolumns + 1)]
     if groupby:
         columns += groupby if isinstance(groupby, list) else [groupby]
-    ddf = dd.read_parquet(
-        path,
-        blocksize=blocksize,
-        columns=columns,
-    )
+    ddf = dd.read_parquet(path, blocksize=blocksize, columns=columns)
     print(f"\nddf: {ddf}\n")
 
     # Aggregate moments (mean, var, std)
     agg = cf.Aggregator(Moments(axis=0), per_column=True)
     t0 = time.time()
     result = aggregate(ddf, agg, to_frame=True)
     tf = time.time()
-    print(f"\nWall Time: {tf-t0} seconds\n")
+    print(f"\nWall Time: {tf - t0} seconds\n")
 
     # View result
     print(f"Result:\n{result}\n")
@@ -76,12 +72,12 @@
         t0 = time.time()
         std = ddf.groupby(groupby).std().compute()
         tf = time.time()
-        print(f"\nddf.groupby().std() takes {tf-t0} seconds, and returns:\n")
+        print(f"\nddf.groupby().std() takes {tf - t0} seconds, and returns:\n")
         print(f"\n{std}\n")
     else:
         # Compare to ddf.std()
         t0 = time.time()
         std = ddf.std().compute()
         tf = time.time()
-        print(f"\nddf.std() takes {tf-t0} seconds, and returns:\n")
+        print(f"\nddf.std() takes {tf - t0} seconds, and returns:\n")
         print(f"\n{std}\n")
diff --git a/tests/pytrec_utils.py b/tests/pytrec_utils.py
@@ -24,13 +24,13 @@ def create_qrel(relevance_scores, ids=None):
 
     qrel = {}
     for i, query_scores in enumerate(relevance_scores):
-        query_id = ids[i] if ids is not None else f"q{i+1}"
+        query_id = ids[i] if ids is not None else f"q{i + 1}"
         qrel[query_id] = {}
         for j, score in enumerate(query_scores):
             _score = int(score.item())
 
             if _score > 0:
-                doc_id = f"d{j+1}"
+                doc_id = f"d{j + 1}"
                 qrel[query_id][doc_id] = int(score.item())
 
     return qrel
@@ -41,10 +41,10 @@ def create_run(predicted_scores, ids=None):
 
     run = {}
     for i, query_scores in enumerate(predicted_scores):
-        query_id = ids[i] if ids is not None else f"q{i+1}"
+        query_id = ids[i] if ids is not None else f"q{i + 1}"
         run[query_id] = {}
         for j, score in enumerate(query_scores):
-            doc_id = f"d{j+1}"
+            doc_id = f"d{j + 1}"
             run[query_id][doc_id] = float(score.item())
 
     return run
@@ -60,6 +60,6 @@ def create_results(metric_arrays):
         for k, v in metric_arrays.items():
             q_out[k] = float(v[i])
 
-        outputs[f"q{i+1}"] = q_out
+        outputs[f"q{i + 1}"] = q_out
 
     return outputs