Skip to content

Commit 3dab4b6

Browse files
committed
Transformer Support for both 4.55 and 4.57.0 using same cache utils
Signed-off-by: Dipankar Sarkar <dipankar@qti.qualcomm.com>
1 parent 6598856 commit 3dab4b6

File tree

7 files changed

+347
-283
lines changed

7 files changed

+347
-283
lines changed

QEfficient/transformers/cache_utils.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
from typing import Any, Dict, List, Optional, Tuple
1111

1212
import torch
13+
import transformers
14+
from packaging import version
1315
from transformers.cache_utils import DynamicCache, DynamicLayer, EncoderDecoderCache, HybridCache, HybridChunkedCache
1416

1517
from QEfficient.customop import (
@@ -330,12 +332,17 @@ def __init__(
330332
layers = []
331333
# If a config is passed, use it to infer the layer types and initialize accordingly
332334
if len(layers) == 0:
333-
Cache.__init__(
334-
self,
335-
layer_class_to_replicate=QEffDynamicLayer,
336-
offloading=offloading,
337-
offload_only_non_sliding=offload_only_non_sliding,
338-
)
335+
# breakpoint()
336+
337+
if version.parse(transformers.__version__) < version.parse("4.57.0"):
338+
Cache.__init__(self, layer_classes=QEffDynamicLayer, *args, **kwargs)
339+
else:
340+
Cache.__init__(
341+
self,
342+
layer_class_to_replicate=QEffDynamicLayer,
343+
offloading=offloading,
344+
offload_only_non_sliding=offload_only_non_sliding,
345+
)
339346
else:
340347
Cache.__init__(
341348
self,

0 commit comments

Comments
 (0)