diff --git a/python/sglang/srt/layers/activation.py b/python/sglang/srt/layers/activation.py index 5dc48821adc..d00c34b7c55 100644 --- a/python/sglang/srt/layers/activation.py +++ b/python/sglang/srt/layers/activation.py @@ -380,4 +380,14 @@ def get_cross_encoder_activation_function(config: PretrainedConfig): logger.info( "sgl-kernel is not available on Non-NV, Non-AMD platforms or Non-AMX CPUs. Fallback to other kernel libraries." ) - from vllm.model_executor.layers.activation import GeluAndMul, SiluAndMul + try: + from vllm.model_executor.layers.activation import GeluAndMul, SiluAndMul + except ImportError: + + class GeluAndMul: + def __init__(self, *args, **kwargs): + pass + + class SiluAndMul: + def __init__(self, *args, **kwargs): + pass diff --git a/python/sglang/utils.py b/python/sglang/utils.py index 1d62c5df854..f34c6fa1b9a 100644 --- a/python/sglang/utils.py +++ b/python/sglang/utils.py @@ -14,7 +14,7 @@ import urllib.request import weakref from concurrent.futures import ThreadPoolExecutor -from functools import wraps +from functools import lru_cache, wraps from io import BytesIO from json import dumps from typing import Any, Callable, List, Optional, Tuple, Type, Union @@ -542,6 +542,7 @@ async def async_stream_and_merge(llm, prompt, sampling_params): yield cleaned_chunk # yield the non-overlapping portion +@lru_cache(maxsize=128) def resolve_obj_by_qualname(qualname: str) -> Any: """ Resolve an object by its fully qualified name.